Analyser.java revision 1981:03d3d3c6bc5a
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni;
21
22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAll;
23import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
24import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnAt;
25import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition;
26import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
27import static jdk.nashorn.internal.runtime.regexp.joni.Option.isMultiline;
28import static jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode.newAltNode;
29import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite;
30import java.util.HashSet;
31import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
32import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
33import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
34import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
35import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
36import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
37import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
38import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
39import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
40import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
41import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
42import jdk.nashorn.internal.runtime.regexp.joni.constants.StackPopLevel;
43import jdk.nashorn.internal.runtime.regexp.joni.constants.TargetInfo;
44import jdk.nashorn.internal.runtime.regexp.joni.encoding.ObjPtr;
45import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
46import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
47import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
48
49final class Analyser extends Parser {
50
51    protected Analyser(final ScanEnvironment env, final char[] chars, final int p, final int end) {
52        super(env, chars, p, end);
53    }
54
55    @SuppressWarnings("unused")
56    protected final void compile() {
57        if (Config.DEBUG) {
58            Config.log.println(new String(chars, getBegin(), getEnd()));
59        }
60
61        reset();
62
63        regex.numMem = 0;
64        regex.numRepeat = 0;
65        regex.numNullCheck = 0;
66        //regex.repeatRangeAlloc = 0;
67        regex.repeatRangeLo = null;
68        regex.repeatRangeHi = null;
69
70        parse();
71
72        if (Config.DEBUG_PARSE_TREE_RAW && Config.DEBUG_PARSE_TREE) {
73            Config.log.println("<RAW TREE>");
74            Config.log.println(root + "\n");
75        }
76
77        root = setupTree(root, 0);
78        if (Config.DEBUG_PARSE_TREE) {
79            if (Config.DEBUG_PARSE_TREE_RAW) {
80                Config.log.println("<TREE>");
81            }
82            root.verifyTree(new HashSet<Node>(), env.reg.warnings);
83            Config.log.println(root + "\n");
84        }
85
86        regex.captureHistory = env.captureHistory;
87        regex.btMemStart = env.btMemStart;
88        regex.btMemEnd = env.btMemEnd;
89
90        if (isFindCondition(regex.options)) {
91            regex.btMemEnd = bsAll();
92        } else {
93            regex.btMemEnd = env.btMemEnd;
94            regex.btMemEnd |= regex.captureHistory;
95        }
96
97        regex.clearOptimizeInfo();
98
99        if (!Config.DONT_OPTIMIZE) {
100            setOptimizedInfoFromTree(root);
101        }
102
103        env.memNodes = null;
104
105        new ArrayCompiler(this).compile();
106
107        if (regex.numRepeat != 0 || regex.btMemEnd != 0) {
108            regex.stackPopLevel = StackPopLevel.ALL;
109        } else {
110            if (regex.btMemStart != 0) {
111                regex.stackPopLevel = StackPopLevel.MEM_START;
112            } else {
113                regex.stackPopLevel = StackPopLevel.FREE;
114            }
115        }
116
117        if (Config.DEBUG_COMPILE) {
118            Config.log.println("stack used: " + regex.stackNeeded);
119            if (Config.USE_STRING_TEMPLATES) {
120                Config.log.print("templates: " + regex.templateNum + "\n");
121            }
122            Config.log.println(new ByteCodePrinter(regex).byteCodeListToString());
123
124        } // DEBUG_COMPILE
125    }
126
127    private void swap(final Node a, final Node b) {
128        a.swap(b);
129
130        if (root == b) {
131            root = a;
132        } else if (root == a) {
133            root = b;
134        }
135    }
136
137    // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
138    private int quantifiersMemoryInfo(final Node node) {
139        int info = 0;
140
141        switch(node.getType()) {
142        case NodeType.LIST:
143        case NodeType.ALT:
144            ConsAltNode can = (ConsAltNode)node;
145            do {
146                final int v = quantifiersMemoryInfo(can.car);
147                if (v > info) {
148                    info = v;
149                }
150            } while ((can = can.cdr) != null);
151            break;
152
153        case NodeType.QTFR:
154            final QuantifierNode qn = (QuantifierNode)node;
155            if (qn.upper != 0) {
156                info = quantifiersMemoryInfo(qn.target);
157            }
158            break;
159
160        case NodeType.ENCLOSE:
161            final EncloseNode en = (EncloseNode)node;
162            switch (en.type) {
163            case EncloseType.MEMORY:
164                return TargetInfo.IS_EMPTY_MEM;
165
166            case EncloseType.OPTION:
167            case EncloseNode.STOP_BACKTRACK:
168                info = quantifiersMemoryInfo(en.target);
169                break;
170
171            default:
172                break;
173            } // inner switch
174            break;
175
176        case NodeType.BREF:
177        case NodeType.STR:
178        case NodeType.CTYPE:
179        case NodeType.CCLASS:
180        case NodeType.CANY:
181        case NodeType.ANCHOR:
182        default:
183            break;
184        } // switch
185
186        return info;
187    }
188
189    private int getMinMatchLength(final Node node) {
190        int min = 0;
191
192        switch (node.getType()) {
193        case NodeType.BREF:
194            final BackRefNode br = (BackRefNode)node;
195            if (br.isRecursion()) {
196                break;
197            }
198
199            if (br.backRef > env.numMem) {
200                throw new ValueException(ERR_INVALID_BACKREF);
201            }
202            min = getMinMatchLength(env.memNodes[br.backRef]);
203
204            break;
205
206        case NodeType.LIST:
207            ConsAltNode can = (ConsAltNode)node;
208            do {
209                min += getMinMatchLength(can.car);
210            } while ((can = can.cdr) != null);
211            break;
212
213        case NodeType.ALT:
214            ConsAltNode y = (ConsAltNode)node;
215            do {
216                final Node x = y.car;
217                final int tmin = getMinMatchLength(x);
218                if (y == node) {
219                    min = tmin;
220                } else if (min > tmin) {
221                    min = tmin;
222                }
223            } while ((y = y.cdr) != null);
224            break;
225
226        case NodeType.STR:
227            min = ((StringNode)node).length();
228            break;
229
230        case NodeType.CTYPE:
231            min = 1;
232            break;
233
234        case NodeType.CCLASS:
235        case NodeType.CANY:
236            min = 1;
237            break;
238
239        case NodeType.QTFR:
240            final QuantifierNode qn = (QuantifierNode)node;
241            if (qn.lower > 0) {
242                min = getMinMatchLength(qn.target);
243                min = MinMaxLen.distanceMultiply(min, qn.lower);
244            }
245            break;
246
247        case NodeType.ENCLOSE:
248            final EncloseNode en = (EncloseNode)node;
249            switch (en.type) {
250            case EncloseType.MEMORY:
251                if (en.isMinFixed()) {
252                    min = en.minLength;
253                } else {
254                    min = getMinMatchLength(en.target);
255                    en.minLength = min;
256                    en.setMinFixed();
257                }
258                break;
259
260            case EncloseType.OPTION:
261            case EncloseType.STOP_BACKTRACK:
262                min = getMinMatchLength(en.target);
263                break;
264
265            default:
266                break;
267            } // inner switch
268            break;
269
270        case NodeType.ANCHOR:
271        default:
272            break;
273        } // switch
274
275        return min;
276    }
277
278    private int getMaxMatchLength(final Node node) {
279        int max = 0;
280
281        switch (node.getType()) {
282        case NodeType.LIST:
283            ConsAltNode ln = (ConsAltNode)node;
284            do {
285                final int tmax = getMaxMatchLength(ln.car);
286                max = MinMaxLen.distanceAdd(max, tmax);
287            } while ((ln = ln.cdr) != null);
288            break;
289
290        case NodeType.ALT:
291            ConsAltNode an = (ConsAltNode)node;
292            do {
293                final int tmax = getMaxMatchLength(an.car);
294                if (max < tmax) {
295                    max = tmax;
296                }
297            } while ((an = an.cdr) != null);
298            break;
299
300        case NodeType.STR:
301            max = ((StringNode)node).length();
302            break;
303
304        case NodeType.CTYPE:
305            max = 1;
306            break;
307
308        case NodeType.CCLASS:
309        case NodeType.CANY:
310            max = 1;
311            break;
312
313        case NodeType.BREF:
314            final BackRefNode br = (BackRefNode)node;
315            if (br.isRecursion()) {
316                max = MinMaxLen.INFINITE_DISTANCE;
317                break;
318            }
319
320            if (br.backRef > env.numMem) {
321                throw new ValueException(ERR_INVALID_BACKREF);
322            }
323            final int tmax = getMaxMatchLength(env.memNodes[br.backRef]);
324            if (max < tmax) {
325                max = tmax;
326            }
327            break;
328
329        case NodeType.QTFR:
330            final QuantifierNode qn = (QuantifierNode)node;
331            if (qn.upper != 0) {
332                max = getMaxMatchLength(qn.target);
333                if (max != 0) {
334                    if (!isRepeatInfinite(qn.upper)) {
335                        max = MinMaxLen.distanceMultiply(max, qn.upper);
336                    } else {
337                        max = MinMaxLen.INFINITE_DISTANCE;
338                    }
339                }
340            }
341            break;
342
343        case NodeType.ENCLOSE:
344            final EncloseNode en = (EncloseNode)node;
345            switch (en.type) {
346            case EncloseType.MEMORY:
347                if (en.isMaxFixed()) {
348                    max = en.maxLength;
349                } else {
350                    max = getMaxMatchLength(en.target);
351                    en.maxLength = max;
352                    en.setMaxFixed();
353                }
354                break;
355
356            case EncloseType.OPTION:
357            case EncloseType.STOP_BACKTRACK:
358                max = getMaxMatchLength(en.target);
359                break;
360
361            default:
362                break;
363            } // inner switch
364            break;
365
366        case NodeType.ANCHOR:
367        default:
368            break;
369        } // switch
370
371        return max;
372    }
373
374    private static final int GET_CHAR_LEN_VARLEN            = -1;
375    private static final int GET_CHAR_LEN_TOP_ALT_VARLEN    = -2;
376    protected final int getCharLengthTree(final Node node) {
377        return getCharLengthTree(node, 0);
378    }
379
380    private int getCharLengthTree(final Node node, final int levelp) {
381        final int level = levelp + 1;
382
383        int len = 0;
384        returnCode = 0;
385
386        switch(node.getType()) {
387        case NodeType.LIST:
388            ConsAltNode ln = (ConsAltNode)node;
389            do {
390                final int tlen = getCharLengthTree(ln.car, level);
391                if (returnCode == 0) {
392                    len = MinMaxLen.distanceAdd(len, tlen);
393                }
394            } while (returnCode == 0 && (ln = ln.cdr) != null);
395            break;
396
397        case NodeType.ALT:
398            ConsAltNode an = (ConsAltNode)node;
399            boolean varLen = false;
400
401            int tlen = getCharLengthTree(an.car, level);
402            while (returnCode == 0 && (an = an.cdr) != null) {
403                final int tlen2 = getCharLengthTree(an.car, level);
404                if (returnCode == 0) {
405                    if (tlen != tlen2) {
406                        varLen = true;
407                    }
408                }
409            }
410
411            if (returnCode == 0) {
412                if (varLen) {
413                    if (level == 1) {
414                        returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN;
415                    } else {
416                        returnCode = GET_CHAR_LEN_VARLEN;
417                    }
418                } else {
419                    len = tlen;
420                }
421            }
422            break;
423
424        case NodeType.STR:
425            final StringNode sn = (StringNode)node;
426            len = sn.length();
427            break;
428
429        case NodeType.QTFR:
430            final QuantifierNode qn = (QuantifierNode)node;
431            if (qn.lower == qn.upper) {
432                tlen = getCharLengthTree(qn.target, level);
433                if (returnCode == 0) {
434                    len = MinMaxLen.distanceMultiply(tlen, qn.lower);
435                }
436            } else {
437                returnCode = GET_CHAR_LEN_VARLEN;
438            }
439            break;
440
441        case NodeType.CTYPE:
442        case NodeType.CCLASS:
443        case NodeType.CANY:
444            len = 1;
445            break;
446
447        case NodeType.ENCLOSE:
448            final EncloseNode en = (EncloseNode)node;
449            switch(en.type) {
450            case EncloseType.MEMORY:
451                if (en.isCLenFixed()) {
452                    len = en.charLength;
453                } else {
454                    len = getCharLengthTree(en.target, level);
455                    if (returnCode == 0) {
456                        en.charLength = len;
457                        en.setCLenFixed();
458                    }
459                }
460                break;
461
462            case EncloseType.OPTION:
463            case EncloseType.STOP_BACKTRACK:
464                len = getCharLengthTree(en.target, level);
465                break;
466
467            default:
468                break;
469            } // inner switch
470            break;
471
472        case NodeType.ANCHOR:
473            break;
474
475        default:
476            returnCode = GET_CHAR_LEN_VARLEN;
477        } // switch
478        return len;
479    }
480
481    /* x is not included y ==>  1 : 0 */
482    private static boolean isNotIncluded(final Node xn, final Node yn) {
483        Node x = xn;
484        Node y = yn;
485        Node tmp;
486
487        // !retry:!
488        retry: while(true) {
489
490        final int yType = y.getType();
491
492        switch(x.getType()) {
493        case NodeType.CTYPE:
494            switch(yType) {
495
496            case NodeType.CCLASS:
497                // !swap:!
498                tmp = x;
499                x = y;
500                y = tmp;
501                // !goto retry;!
502                continue retry;
503
504            case NodeType.STR:
505                // !goto swap;!
506                tmp = x;
507                x = y;
508                y = tmp;
509                continue retry;
510
511            default:
512                break;
513            } // inner switch
514            break;
515
516        case NodeType.CCLASS:
517            final CClassNode xc = (CClassNode)x;
518
519            switch(yType) {
520
521            case NodeType.CCLASS:
522                final CClassNode yc = (CClassNode)y;
523
524                for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
525                    boolean v = xc.bs.at(i);
526                    if ((v && !xc.isNot()) || (!v && xc.isNot())) {
527                        v = yc.bs.at(i);
528                        if ((v && !yc.isNot()) || (!v && yc.isNot())) {
529                            return false;
530                        }
531                    }
532                }
533                if ((xc.mbuf == null && !xc.isNot()) || yc.mbuf == null && !yc.isNot()) {
534                    return true;
535                }
536                return false;
537                // break; not reached
538
539            case NodeType.STR:
540                // !goto swap;!
541                tmp = x;
542                x = y;
543                y = tmp;
544                continue retry;
545
546            default:
547                break;
548
549            } // inner switch
550            break; // case NodeType.CCLASS
551
552        case NodeType.STR:
553            final StringNode xs = (StringNode)x;
554            if (xs.length() == 0) {
555                break;
556            }
557
558            switch (yType) {
559
560            case NodeType.CCLASS:
561                final CClassNode cc = (CClassNode)y;
562                final int code = xs.chars[xs.p];
563                return !cc.isCodeInCC(code);
564
565            case NodeType.STR:
566                final StringNode ys = (StringNode)y;
567                int len = xs.length();
568                if (len > ys.length()) {
569                    len = ys.length();
570                }
571                if (xs.isAmbig() || ys.isAmbig()) {
572                    /* tiny version */
573                    return false;
574                }
575                for (int i=0, pt=ys.p, q=xs.p; i<len; i++, pt++, q++) {
576                    if (ys.chars[pt] != xs.chars[q]) {
577                        return true;
578                    }
579                }
580                break;
581
582            default:
583                break;
584            } // inner switch
585
586            break; // case NodeType.STR
587        default:
588            break;
589
590        } // switch
591
592        break;
593        } // retry: while
594        return false;
595    }
596
597    private Node getHeadValueNode(final Node node, final boolean exact) {
598        Node n = null;
599
600        switch(node.getType()) {
601        case NodeType.BREF:
602        case NodeType.ALT:
603        case NodeType.CANY:
604            break;
605
606        case NodeType.CTYPE:
607        case NodeType.CCLASS:
608            if (!exact) {
609                n = node;
610            }
611            break;
612
613        case NodeType.LIST:
614            n = getHeadValueNode(((ConsAltNode)node).car, exact);
615            break;
616
617        case NodeType.STR:
618            final StringNode sn = (StringNode)node;
619            if (sn.end <= sn.p)
620             {
621                break; // ???
622            }
623
624            if (exact && !sn.isRaw() && isIgnoreCase(regex.options)){
625                // nothing
626            } else {
627                n = node;
628            }
629            break;
630
631        case NodeType.QTFR:
632            final QuantifierNode qn = (QuantifierNode)node;
633            if (qn.lower > 0) {
634                if (qn.headExact != null) {
635                    n = qn.headExact;
636                } else {
637                    n = getHeadValueNode(qn.target, exact);
638                }
639            }
640            break;
641
642        case NodeType.ENCLOSE:
643            final EncloseNode en = (EncloseNode)node;
644
645            switch (en.type) {
646            case EncloseType.OPTION:
647                final int options = regex.options;
648                regex.options = en.option;
649                n = getHeadValueNode(en.target, exact);
650                regex.options = options;
651                break;
652
653            case EncloseType.MEMORY:
654            case EncloseType.STOP_BACKTRACK:
655                n = getHeadValueNode(en.target, exact);
656                break;
657
658            default:
659                break;
660            } // inner switch
661            break;
662
663        case NodeType.ANCHOR:
664            final AnchorNode an = (AnchorNode)node;
665            if (an.type == AnchorType.PREC_READ) {
666                n = getHeadValueNode(an.target, exact);
667            }
668            break;
669
670        default:
671            break;
672        } // switch
673
674        return n;
675    }
676
677    // true: invalid
678    private boolean checkTypeTree(final Node node, final int typeMask, final int encloseMask, final int anchorMask) {
679        if ((node.getType2Bit() & typeMask) == 0) {
680            return true;
681        }
682
683        boolean invalid = false;
684
685        switch(node.getType()) {
686        case NodeType.LIST:
687        case NodeType.ALT:
688            ConsAltNode can = (ConsAltNode)node;
689            do {
690                invalid = checkTypeTree(can.car, typeMask, encloseMask, anchorMask);
691            } while (!invalid && (can = can.cdr) != null);
692            break;
693
694        case NodeType.QTFR:
695            invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask);
696            break;
697
698        case NodeType.ENCLOSE:
699            final EncloseNode en = (EncloseNode)node;
700            if ((en.type & encloseMask) == 0) {
701                return true;
702            }
703            invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask);
704            break;
705
706        case NodeType.ANCHOR:
707            final AnchorNode an = (AnchorNode)node;
708            if ((an.type & anchorMask) == 0) {
709                return true;
710            }
711
712            if (an.target != null) {
713                invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask);
714            }
715            break;
716
717        default:
718            break;
719
720        } // switch
721
722        return invalid;
723    }
724
725    /* divide different length alternatives in look-behind.
726    (?<=A|B) ==> (?<=A)|(?<=B)
727    (?<!A|B) ==> (?<!A)(?<!B)
728     */
729    private Node divideLookBehindAlternatives(final Node nodep) {
730        Node node = nodep;
731        final AnchorNode an = (AnchorNode)node;
732        final int anchorType = an.type;
733        Node head = an.target;
734        Node np = ((ConsAltNode)head).car;
735
736        swap(node, head);
737
738        final Node tmp = node;
739        node = head;
740        head = tmp;
741
742        ((ConsAltNode)node).setCar(head);
743        ((AnchorNode)head).setTarget(np);
744        np = node;
745
746        while ((np = ((ConsAltNode)np).cdr) != null) {
747            final AnchorNode insert = new AnchorNode(anchorType);
748            insert.setTarget(((ConsAltNode)np).car);
749            ((ConsAltNode)np).setCar(insert);
750        }
751
752        if (anchorType == AnchorType.LOOK_BEHIND_NOT) {
753            np = node;
754            do {
755                ((ConsAltNode)np).toListNode(); /* alt -> list */
756            } while ((np = ((ConsAltNode)np).cdr) != null);
757        }
758
759        return node;
760    }
761
762    private Node setupLookBehind(final Node node) {
763        final AnchorNode an = (AnchorNode)node;
764        final int len = getCharLengthTree(an.target);
765        switch (returnCode) {
766        case 0:
767            an.charLength = len;
768            break;
769        case GET_CHAR_LEN_VARLEN:
770            throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
771        case GET_CHAR_LEN_TOP_ALT_VARLEN:
772            if (syntax.differentLengthAltLookBehind()) {
773                return divideLookBehindAlternatives(node);
774            }
775            throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
776        default:
777            break;
778        }
779        return node;
780    }
781
782    private void nextSetup(final Node nodep, final Node nextNode) {
783        Node node = nodep;
784
785        // retry:
786        retry: while(true) {
787
788        final int type = node.getType();
789        if (type == NodeType.QTFR) {
790            final QuantifierNode qn = (QuantifierNode)node;
791            if (qn.greedy && isRepeatInfinite(qn.upper)) {
792                if (Config.USE_QTFR_PEEK_NEXT) {
793                    final StringNode n = (StringNode)getHeadValueNode(nextNode, true);
794                    /* '\0': for UTF-16BE etc... */
795                    if (n != null && n.chars[n.p] != 0) { // ?????????
796                        qn.nextHeadExact = n;
797                    }
798                } // USE_QTFR_PEEK_NEXT
799                /* automatic posseivation a*b ==> (?>a*)b */
800                if (qn.lower <= 1) {
801                    if (qn.target.isSimple()) {
802                        final Node x = getHeadValueNode(qn.target, false);
803                        if (x != null) {
804                            final Node y = getHeadValueNode(nextNode, false);
805                            if (y != null && isNotIncluded(x, y)) {
806                                final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); //onig_node_new_enclose
807                                en.setStopBtSimpleRepeat();
808                                //en.setTarget(qn.target); // optimize it ??
809                                swap(node, en);
810
811                                en.setTarget(node);
812                            }
813                        }
814                    }
815                }
816            }
817        } else if (type == NodeType.ENCLOSE) {
818            final EncloseNode en = (EncloseNode)node;
819            if (en.isMemory()) {
820                node = en.target;
821                // !goto retry;!
822                continue retry;
823            }
824        }
825
826        break;
827        } // while
828    }
829
830    private void updateStringNodeCaseFoldMultiByte(final StringNode sn) {
831        final char[] ch = sn.chars;
832        final int end = sn.end;
833        value = sn.p;
834        int sp = 0;
835        char buf;
836
837        while (value < end) {
838            final int ovalue = value;
839            buf = EncodingHelper.toLowerCase(ch[value++]);
840
841            if (ch[ovalue] != buf) {
842
843                char[] sbuf = new char[sn.length() << 1];
844                System.arraycopy(ch, sn.p, sbuf, 0, ovalue - sn.p);
845                value = ovalue;
846                while (value < end) {
847                    buf = EncodingHelper.toLowerCase(ch[value++]);
848                    if (sp >= sbuf.length) {
849                        final char[]tmp = new char[sbuf.length << 1];
850                        System.arraycopy(sbuf, 0, tmp, 0, sbuf.length);
851                        sbuf = tmp;
852                    }
853                    sbuf[sp++] = buf;
854                }
855                sn.set(sbuf, 0, sp);
856                return;
857            }
858            sp++;
859        }
860    }
861
862    private void updateStringNodeCaseFold(final Node node) {
863        final StringNode sn = (StringNode)node;
864        updateStringNodeCaseFoldMultiByte(sn);
865    }
866
867    private Node expandCaseFoldMakeRemString(final char[] ch, final int pp, final int end) {
868        final StringNode node = new StringNode(ch, pp, end);
869
870        updateStringNodeCaseFold(node);
871        node.setAmbig();
872        node.setDontGetOptInfo();
873        return node;
874    }
875
876    private static boolean expandCaseFoldStringAlt(final int itemNum, final char[] items,
877                                              final char[] chars, final int p, final int slen, final int end, final ObjPtr<Node> node) {
878
879        ConsAltNode altNode;
880        node.p = altNode = newAltNode(null, null);
881
882        StringNode snode = new StringNode(chars, p, p + slen);
883        altNode.setCar(snode);
884
885        for (int i=0; i<itemNum; i++) {
886            snode = new StringNode();
887
888            snode.catCode(items[i]);
889
890            final ConsAltNode an = newAltNode(null, null);
891            an.setCar(snode);
892            altNode.setCdr(an);
893            altNode = an;
894        }
895        return false;
896    }
897
898    private static final int THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION = 8;
899    private Node expandCaseFoldString(final Node node) {
900        final StringNode sn = (StringNode)node;
901
902        if (sn.isAmbig() || sn.length() <= 0) {
903            return node;
904        }
905
906        final char[] chars1 = sn.chars;
907        int pt = sn.p;
908        final int end = sn.end;
909        int altNum = 1;
910
911        ConsAltNode topRoot = null, r = null;
912        @SuppressWarnings("unused")
913        final ObjPtr<Node> prevNode = new ObjPtr<Node>();
914        StringNode stringNode = null;
915
916        while (pt < end) {
917            final char[] items = EncodingHelper.caseFoldCodesByString(regex.caseFoldFlag, chars1[pt]);
918
919            if (items.length == 0) {
920                if (stringNode == null) {
921                    if (r == null && prevNode.p != null) {
922                        topRoot = r = ConsAltNode.listAdd(null, prevNode.p);
923                    }
924
925                    prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL);
926
927                    if (r != null) {
928                        ConsAltNode.listAdd(r, stringNode);
929                    }
930
931                }
932
933                stringNode.cat(chars1, pt, pt + 1);
934            } else {
935                altNum *= (items.length + 1);
936                if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) {
937                    break;
938                }
939
940                if (r == null && prevNode.p != null) {
941                    topRoot = r = ConsAltNode.listAdd(null, prevNode.p);
942                }
943
944                expandCaseFoldStringAlt(items.length, items, chars1, pt, 1, end, prevNode);
945                if (r != null) {
946                    ConsAltNode.listAdd(r, prevNode.p);
947                }
948                stringNode = null;
949            }
950            pt++;
951        }
952
953        if (pt < end) {
954            final Node srem = expandCaseFoldMakeRemString(chars1, pt, end);
955
956            if (prevNode.p != null && r == null) {
957                topRoot = r = ConsAltNode.listAdd(null, prevNode.p);
958            }
959
960            if (r == null) {
961                prevNode.p = srem;
962            } else {
963                ConsAltNode.listAdd(r, srem);
964            }
965        }
966        /* ending */
967        final Node xnode = topRoot != null ? topRoot : prevNode.p;
968
969        swap(node, xnode);
970        return xnode;
971    }
972
973    private static final int IN_ALT                     = (1<<0);
974    private static final int IN_NOT                     = (1<<1);
975    private static final int IN_REPEAT                  = (1<<2);
976    private static final int IN_VAR_REPEAT              = (1<<3);
977    private static final int EXPAND_STRING_MAX_LENGTH   = 100;
978
979    /* setup_tree does the following work.
980    1. check empty loop. (set qn->target_empty_info)
981    2. expand ignore-case in char class.
982    3. set memory status bit flags. (reg->mem_stats)
983    4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
984    5. find invalid patterns in look-behind.
985    6. expand repeated string.
986    */
987    protected final Node setupTree(final Node nodep, final int statep) {
988        Node node = nodep;
989        int state = statep;
990
991        restart: while (true) {
992        switch (node.getType()) {
993        case NodeType.LIST:
994            ConsAltNode lin = (ConsAltNode)node;
995            Node prev = null;
996            do {
997                setupTree(lin.car, state);
998                if (prev != null) {
999                    nextSetup(prev, lin.car);
1000                }
1001                prev = lin.car;
1002            } while ((lin = lin.cdr) != null);
1003            break;
1004
1005        case NodeType.ALT:
1006            ConsAltNode aln = (ConsAltNode)node;
1007            do {
1008                setupTree(aln.car, (state | IN_ALT));
1009            } while ((aln = aln.cdr) != null);
1010            break;
1011
1012        case NodeType.CCLASS:
1013            break;
1014
1015        case NodeType.STR:
1016            if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) {
1017                node = expandCaseFoldString(node);
1018            }
1019            break;
1020
1021        case NodeType.CTYPE:
1022        case NodeType.CANY:
1023            break;
1024
1025        case NodeType.BREF:
1026            final BackRefNode br = (BackRefNode)node;
1027            if (br.backRef > env.numMem) {
1028                throw new ValueException(ERR_INVALID_BACKREF);
1029            }
1030            env.backrefedMem = bsOnAt(env.backrefedMem, br.backRef);
1031            env.btMemStart = bsOnAt(env.btMemStart, br.backRef);
1032            ((EncloseNode)env.memNodes[br.backRef]).setMemBackrefed();
1033            break;
1034
1035        case NodeType.QTFR:
1036            final QuantifierNode qn = (QuantifierNode)node;
1037            Node target = qn.target;
1038
1039            if ((state & IN_REPEAT) != 0) {
1040                qn.setInRepeat();
1041            }
1042
1043            if (isRepeatInfinite(qn.upper) || qn.lower >= 1) {
1044                final int d = getMinMatchLength(target);
1045                if (d == 0) {
1046                    qn.targetEmptyInfo = TargetInfo.IS_EMPTY;
1047                    if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
1048                        final int info = quantifiersMemoryInfo(target);
1049                        if (info > 0) {
1050                            qn.targetEmptyInfo = info;
1051                        }
1052                    } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
1053                    // strange stuff here (turned off)
1054                }
1055            }
1056
1057            state |= IN_REPEAT;
1058            if (qn.lower != qn.upper) {
1059                state |= IN_VAR_REPEAT;
1060            }
1061
1062            target = setupTree(target, state);
1063
1064            /* expand string */
1065            if (target.getType() == NodeType.STR) {
1066                if (!isRepeatInfinite(qn.lower) && qn.lower == qn.upper &&
1067                    qn.lower > 1 && qn.lower <= EXPAND_STRING_MAX_LENGTH) {
1068                    final StringNode sn = (StringNode)target;
1069                    final int len = sn.length();
1070
1071                    if (len * qn.lower <= EXPAND_STRING_MAX_LENGTH) {
1072                        final StringNode str = qn.convertToString(sn.flag);
1073                        final int n = qn.lower;
1074                        for (int i = 0; i < n; i++) {
1075                            str.cat(sn.chars, sn.p, sn.end);
1076                        }
1077                        break; /* break case NT_QTFR: */
1078                    }
1079
1080                }
1081            }
1082            if (Config.USE_OP_PUSH_OR_JUMP_EXACT) {
1083                if (qn.greedy && qn.targetEmptyInfo != 0) {
1084                    if (target.getType() == NodeType.QTFR) {
1085                        final QuantifierNode tqn = (QuantifierNode)target;
1086                        if (tqn.headExact != null) {
1087                            qn.headExact = tqn.headExact;
1088                            tqn.headExact = null;
1089                        }
1090                    } else {
1091                        qn.headExact = getHeadValueNode(qn.target, true);
1092                    }
1093                }
1094            } // USE_OP_PUSH_OR_JUMP_EXACT
1095            break;
1096
1097        case NodeType.ENCLOSE:
1098            final EncloseNode en = (EncloseNode)node;
1099            switch (en.type) {
1100            case EncloseType.OPTION:
1101                final int options = regex.options;
1102                regex.options = en.option;
1103                setupTree(en.target, state);
1104                regex.options = options;
1105                break;
1106
1107            case EncloseType.MEMORY:
1108                if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
1109                    env.btMemStart = bsOnAt(env.btMemStart, en.regNum);
1110                    /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
1111
1112                }
1113                setupTree(en.target, state);
1114                break;
1115
1116            case EncloseType.STOP_BACKTRACK:
1117                setupTree(en.target, state);
1118                if (en.target.getType() == NodeType.QTFR) {
1119                    final QuantifierNode tqn = (QuantifierNode)en.target;
1120                    if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) {
1121                        /* (?>a*), a*+ etc... */
1122                        if (tqn.target.isSimple()) {
1123                            en.setStopBtSimpleRepeat();
1124                        }
1125                    }
1126                }
1127                break;
1128
1129            default:
1130                break;
1131
1132            } // inner switch
1133            break;
1134
1135        case NodeType.ANCHOR:
1136            final AnchorNode an = (AnchorNode)node;
1137            switch (an.type) {
1138            case AnchorType.PREC_READ:
1139                setupTree(an.target, state);
1140                break;
1141
1142            case AnchorType.PREC_READ_NOT:
1143                setupTree(an.target, (state | IN_NOT));
1144                break;
1145
1146            case AnchorType.LOOK_BEHIND:
1147                if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
1148                    throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
1149                }
1150                node = setupLookBehind(node);
1151                if (node.getType() != NodeType.ANCHOR) {
1152                    continue restart;
1153                }
1154                setupTree(((AnchorNode)node).target, state);
1155                break;
1156
1157            case AnchorType.LOOK_BEHIND_NOT:
1158                if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) {
1159                    throw new SyntaxException(ERR_INVALID_LOOK_BEHIND_PATTERN);
1160                }
1161                node = setupLookBehind(node);
1162                if (node.getType() != NodeType.ANCHOR) {
1163                    continue restart;
1164                }
1165                setupTree(((AnchorNode)node).target, (state | IN_NOT));
1166                break;
1167
1168            default:
1169                break;
1170
1171            } // inner switch
1172            break;
1173        default:
1174            break;
1175        } // switch
1176        return node;
1177        } // restart: while
1178    }
1179
1180    private static final int MAX_NODE_OPT_INFO_REF_COUNT   = 5;
1181    private void optimizeNodeLeft(final Node node, final NodeOptInfo opt, final OptEnvironment oenv) { // oenv remove, pass mmd
1182        opt.clear();
1183        opt.setBoundNode(oenv.mmd);
1184
1185        switch (node.getType()) {
1186        case NodeType.LIST: {
1187            final OptEnvironment nenv = new OptEnvironment();
1188            final NodeOptInfo nopt = new NodeOptInfo();
1189            nenv.copy(oenv);
1190            ConsAltNode lin = (ConsAltNode)node;
1191            do {
1192                optimizeNodeLeft(lin.car, nopt, nenv);
1193                nenv.mmd.add(nopt.length);
1194                opt.concatLeftNode(nopt);
1195            } while ((lin = lin.cdr) != null);
1196            break;
1197        }
1198
1199        case NodeType.ALT: {
1200            final NodeOptInfo nopt = new NodeOptInfo();
1201            ConsAltNode aln = (ConsAltNode)node;
1202            do {
1203                optimizeNodeLeft(aln.car, nopt, oenv);
1204                if (aln == node) {
1205                    opt.copy(nopt);
1206                } else {
1207                    opt.altMerge(nopt, oenv);
1208                }
1209            } while ((aln = aln.cdr) != null);
1210            break;
1211        }
1212
1213        case NodeType.STR: {
1214            final StringNode sn = (StringNode)node;
1215
1216            final int slen = sn.length();
1217
1218            if (!sn.isAmbig()) {
1219                opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
1220
1221                if (slen > 0) {
1222                    opt.map.addChar(sn.chars[sn.p]);
1223                }
1224
1225                opt.length.set(slen, slen);
1226            } else {
1227                int max;
1228                if (sn.isDontGetOptInfo()) {
1229                    max = sn.length();
1230                } else {
1231                    opt.exb.concatStr(sn.chars, sn.p, sn.end, sn.isRaw());
1232                    opt.exb.ignoreCase = true;
1233
1234                    if (slen > 0) {
1235                        opt.map.addCharAmb(sn.chars, sn.p, sn.end, oenv.caseFoldFlag);
1236                    }
1237
1238                    max = slen;
1239                }
1240                opt.length.set(slen, max);
1241            }
1242
1243            if (opt.exb.length == slen) {
1244                opt.exb.reachEnd = true;
1245            }
1246            break;
1247        }
1248
1249        case NodeType.CCLASS: {
1250            final CClassNode cc = (CClassNode)node;
1251            /* no need to check ignore case. (setted in setup_tree()) */
1252            if (cc.mbuf != null || cc.isNot()) {
1253                opt.length.set(1, 1);
1254            } else {
1255                for (int i=0; i<BitSet.SINGLE_BYTE_SIZE; i++) {
1256                    final boolean z = cc.bs.at(i);
1257                    if ((z && !cc.isNot()) || (!z && cc.isNot())) {
1258                        opt.map.addChar(i);
1259                    }
1260                }
1261                opt.length.set(1, 1);
1262            }
1263            break;
1264        }
1265
1266        case NodeType.CANY: {
1267            opt.length.set(1, 1);
1268            break;
1269        }
1270
1271        case NodeType.ANCHOR: {
1272            final AnchorNode an = (AnchorNode)node;
1273            switch (an.type) {
1274            case AnchorType.BEGIN_BUF:
1275            case AnchorType.BEGIN_POSITION:
1276            case AnchorType.BEGIN_LINE:
1277            case AnchorType.END_BUF:
1278            case AnchorType.SEMI_END_BUF:
1279            case AnchorType.END_LINE:
1280                opt.anchor.add(an.type);
1281                break;
1282
1283            case AnchorType.PREC_READ:
1284                final NodeOptInfo nopt = new NodeOptInfo();
1285                optimizeNodeLeft(an.target, nopt, oenv);
1286                if (nopt.exb.length > 0) {
1287                    opt.expr.copy(nopt.exb);
1288                } else if (nopt.exm.length > 0) {
1289                    opt.expr.copy(nopt.exm);
1290                }
1291                opt.expr.reachEnd = false;
1292                if (nopt.map.value > 0) {
1293                    opt.map.copy(nopt.map);
1294                }
1295                break;
1296
1297            case AnchorType.PREC_READ_NOT:
1298            case AnchorType.LOOK_BEHIND:    /* Sorry, I can't make use of it. */
1299            case AnchorType.LOOK_BEHIND_NOT:
1300                break;
1301
1302            default:
1303                break;
1304
1305            } // inner switch
1306            break;
1307        }
1308
1309        case NodeType.BREF: {
1310            final BackRefNode br = (BackRefNode)node;
1311
1312            if (br.isRecursion()) {
1313                opt.length.set(0, MinMaxLen.INFINITE_DISTANCE);
1314                break;
1315            }
1316
1317            final Node[]nodes = oenv.scanEnv.memNodes;
1318
1319            final int min = getMinMatchLength(nodes[br.backRef]);
1320            final int max = getMaxMatchLength(nodes[br.backRef]);
1321
1322            opt.length.set(min, max);
1323            break;
1324        }
1325
1326
1327        case NodeType.QTFR: {
1328            final NodeOptInfo nopt = new NodeOptInfo();
1329            final QuantifierNode qn = (QuantifierNode)node;
1330            optimizeNodeLeft(qn.target, nopt, oenv);
1331            if (qn.lower == 0 && isRepeatInfinite(qn.upper)) {
1332                if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) {
1333                    if (isMultiline(oenv.options)) {
1334                        opt.anchor.add(AnchorType.ANYCHAR_STAR_ML);
1335                    } else {
1336                        opt.anchor.add(AnchorType.ANYCHAR_STAR);
1337                    }
1338                }
1339            } else {
1340                if (qn.lower > 0) {
1341                    opt.copy(nopt);
1342                    if (nopt.exb.length > 0) {
1343                        if (nopt.exb.reachEnd) {
1344                            int i;
1345                            for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) {
1346                                opt.exb.concat(nopt.exb);
1347                            }
1348                            if (i < qn.lower) {
1349                                opt.exb.reachEnd = false;
1350                            }
1351                        }
1352                    }
1353                    if (qn.lower != qn.upper) {
1354                        opt.exb.reachEnd = false;
1355                        opt.exm.reachEnd = false;
1356                    }
1357                    if (qn.lower > 1) {
1358                        opt.exm.reachEnd = false;
1359                    }
1360
1361                }
1362            }
1363            final int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower);
1364            int max;
1365            if (isRepeatInfinite(qn.upper)) {
1366                max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0;
1367            } else {
1368                max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper);
1369            }
1370            opt.length.set(min, max);
1371            break;
1372        }
1373
1374        case NodeType.ENCLOSE: {
1375            final EncloseNode en = (EncloseNode)node;
1376            switch (en.type) {
1377            case EncloseType.OPTION:
1378                final int save = oenv.options;
1379                oenv.options = en.option;
1380                optimizeNodeLeft(en.target, opt, oenv);
1381                oenv.options = save;
1382                break;
1383
1384            case EncloseType.MEMORY:
1385                if (++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) {
1386                    int min = 0;
1387                    int max = MinMaxLen.INFINITE_DISTANCE;
1388                    if (en.isMinFixed()) {
1389                        min = en.minLength;
1390                    }
1391                    if (en.isMaxFixed()) {
1392                        max = en.maxLength;
1393                    }
1394                    opt.length.set(min, max);
1395                } else { // USE_SUBEXP_CALL
1396                    optimizeNodeLeft(en.target, opt, oenv);
1397                    if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) {
1398                        if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) {
1399                            opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK);
1400                        }
1401                    }
1402                }
1403                break;
1404
1405            case EncloseType.STOP_BACKTRACK:
1406                optimizeNodeLeft(en.target, opt, oenv);
1407                break;
1408
1409            default:
1410                break;
1411            } // inner switch
1412            break;
1413        }
1414
1415        default:
1416            throw new InternalException(ERR_PARSER_BUG);
1417        } // switch
1418    }
1419
1420    @SuppressWarnings("unused")
1421    protected final void setOptimizedInfoFromTree(final Node node) {
1422        final NodeOptInfo opt = new NodeOptInfo();
1423        final OptEnvironment oenv = new OptEnvironment();
1424
1425        oenv.options = regex.options;
1426        oenv.caseFoldFlag = regex.caseFoldFlag;
1427        oenv.scanEnv = env;
1428        oenv.mmd.clear(); // ??
1429
1430        optimizeNodeLeft(node, opt, oenv);
1431
1432        regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF |
1433                                                AnchorType.BEGIN_POSITION |
1434                                                AnchorType.ANYCHAR_STAR |
1435                                                AnchorType.ANYCHAR_STAR_ML);
1436
1437        regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF |
1438                                                  AnchorType.SEMI_END_BUF);
1439
1440        if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) {
1441            regex.anchorDmin = opt.length.min;
1442            regex.anchorDmax = opt.length.max;
1443        }
1444
1445        if (opt.exb.length > 0 || opt.exm.length > 0) {
1446            opt.exb.select(opt.exm);
1447            if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) {
1448                // !goto set_map;!
1449                regex.setOptimizeMapInfo(opt.map);
1450                regex.setSubAnchor(opt.map.anchor);
1451            } else {
1452                regex.setExactInfo(opt.exb);
1453                regex.setSubAnchor(opt.exb.anchor);
1454            }
1455        } else if (opt.map.value > 0) {
1456            // !set_map:!
1457            regex.setOptimizeMapInfo(opt.map);
1458            regex.setSubAnchor(opt.map.anchor);
1459        } else {
1460            regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE;
1461            if (opt.length.max == 0) {
1462                regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE;
1463            }
1464        }
1465
1466        if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) {
1467            Config.log.println(regex.optimizeInfoToString());
1468        }
1469    }
1470}
1471