1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni;
21
22import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsOnOff;
23import static jdk.nashorn.internal.runtime.regexp.joni.Option.isDontCaptureGroup;
24import static jdk.nashorn.internal.runtime.regexp.joni.Option.isIgnoreCase;
25import jdk.nashorn.internal.runtime.regexp.joni.ast.AnchorNode;
26import jdk.nashorn.internal.runtime.regexp.joni.ast.AnyCharNode;
27import jdk.nashorn.internal.runtime.regexp.joni.ast.BackRefNode;
28import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
29import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode.CCStateArg;
30import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
31import jdk.nashorn.internal.runtime.regexp.joni.ast.EncloseNode;
32import jdk.nashorn.internal.runtime.regexp.joni.ast.Node;
33import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
34import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
35import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
36import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
37import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
38import jdk.nashorn.internal.runtime.regexp.joni.constants.EncloseType;
39import jdk.nashorn.internal.runtime.regexp.joni.constants.NodeType;
40import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
41import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
42import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
43import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
44import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
45
46class Parser extends Lexer {
47
48    protected final Regex regex;
49    protected Node root;
50
51    protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
52                              // this approach will not affect recursive calls
53
54    protected Parser(final ScanEnvironment env, final char[] chars, final int p, final int end) {
55        super(env, chars, p, end);
56        regex = env.reg;
57    }
58
59    // onig_parse_make_tree
60    protected final Node parse() {
61        root = parseRegexp();
62        regex.numMem = env.numMem;
63        return root;
64    }
65
66    private boolean codeExistCheck(final int code, final boolean ignoreEscaped) {
67        mark();
68
69        boolean inEsc = false;
70        while (left()) {
71            if (ignoreEscaped && inEsc) {
72                inEsc = false;
73            } else {
74                fetch();
75                if (c == code) {
76                    restore();
77                    return true;
78                }
79                if (c == syntax.metaCharTable.esc) {
80                    inEsc = true;
81                }
82            }
83        }
84
85        restore();
86        return false;
87    }
88
89    private CClassNode parseCharClass() {
90        fetchTokenInCC();
91
92        final boolean neg;
93        if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) {
94            neg = true;
95            fetchTokenInCC();
96        } else {
97            neg = false;
98        }
99
100        if (token.type == TokenType.CC_CLOSE) {
101            if (!codeExistCheck(']', true)) {
102                throw new SyntaxException(ERR_EMPTY_CHAR_CLASS);
103            }
104            env.ccEscWarn("]");
105            token.type = TokenType.CHAR; /* allow []...] */
106        }
107
108        CClassNode cc = new CClassNode();
109        CClassNode prevCC = null;
110        CClassNode workCC = null;
111
112        final CCStateArg arg = new CCStateArg();
113
114        boolean andStart = false;
115        arg.state = CCSTATE.START;
116
117        while (token.type != TokenType.CC_CLOSE) {
118            boolean fetched = false;
119
120            switch (token.type) {
121
122            case CHAR:
123                if (token.getC() > 0xff) {
124                    arg.inType = CCVALTYPE.CODE_POINT;
125                } else {
126                    arg.inType = CCVALTYPE.SB; // sb_char:
127                }
128                arg.v = token.getC();
129                arg.vIsRaw = false;
130                parseCharClassValEntry2(cc, arg); // goto val_entry2
131                break;
132
133            case RAW_BYTE:
134                arg.v = token.getC();
135                arg.inType = CCVALTYPE.SB; // raw_single:
136                arg.vIsRaw = true;
137                parseCharClassValEntry2(cc, arg); // goto val_entry2
138                break;
139
140            case CODE_POINT:
141                arg.v = token.getCode();
142                arg.vIsRaw = true;
143                parseCharClassValEntry(cc, arg); // val_entry:, val_entry2
144                break;
145
146            case CHAR_TYPE:
147                cc.addCType(token.getPropCType(), token.getPropNot(), env, this);
148                cc.nextStateClass(arg, env); // next_class:
149                break;
150
151            case CC_RANGE:
152                if (arg.state == CCSTATE.VALUE) {
153                    fetchTokenInCC();
154                    fetched = true;
155                    if (token.type == TokenType.CC_CLOSE) { /* allow [x-] */
156                        parseCharClassRangeEndVal(cc, arg); // range_end_val:, goto val_entry;
157                        break;
158                    } else if (token.type == TokenType.CC_AND) {
159                        env.ccEscWarn("-");
160                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
161                        break;
162                    }
163                    arg.state = CCSTATE.RANGE;
164                } else if (arg.state == CCSTATE.START) {
165                    arg.v = token.getC(); /* [-xa] is allowed */
166                    arg.vIsRaw = false;
167                    fetchTokenInCC();
168                    fetched = true;
169                    if (token.type == TokenType.CC_RANGE || andStart) {
170                        env.ccEscWarn("-"); /* [--x] or [a&&-x] is warned. */
171                    }
172                    parseCharClassValEntry(cc, arg); // goto val_entry
173                    break;
174                } else if (arg.state == CCSTATE.RANGE) {
175                    env.ccEscWarn("-");
176                    parseCharClassSbChar(cc, arg); // goto sb_char /* [!--x] is allowed */
177                    break;
178                } else { /* CCS_COMPLETE */
179                    fetchTokenInCC();
180                    fetched = true;
181                    if (token.type == TokenType.CC_CLOSE) { /* allow [a-b-] */
182                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
183                        break;
184                    } else if (token.type == TokenType.CC_AND) {
185                        env.ccEscWarn("-");
186                        parseCharClassRangeEndVal(cc, arg); // goto range_end_val
187                        break;
188                    }
189
190                    if (syntax.allowDoubleRangeOpInCC()) {
191                        env.ccEscWarn("-");
192                        arg.inType = CCVALTYPE.SB;
193                        arg.v = '-';
194                        arg.vIsRaw = false;
195                        parseCharClassValEntry2(cc, arg); // goto val_entry2 /* [0-9-a] is allowed as [0-9\-a] */
196                        break;
197                    }
198                    throw new SyntaxException(ERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS);
199                }
200                break;
201
202            case CC_CC_OPEN: /* [ */
203                final CClassNode acc = parseCharClass();
204                cc.or(acc);
205                break;
206
207            case CC_AND:     /* && */
208                if (arg.state == CCSTATE.VALUE) {
209                    arg.v = 0; // ??? safe v ?
210                    arg.vIsRaw = false;
211                    cc.nextStateValue(arg, env);
212                }
213                /* initialize local variables */
214                andStart = true;
215                arg.state = CCSTATE.START;
216                if (prevCC != null) {
217                    prevCC.and(cc);
218                } else {
219                    prevCC = cc;
220                    if (workCC == null) {
221                        workCC = new CClassNode();
222                    }
223                    cc = workCC;
224                }
225                cc.clear();
226                break;
227
228            case EOT:
229                throw new SyntaxException(ERR_PREMATURE_END_OF_CHAR_CLASS);
230
231            default:
232                throw new InternalException(ERR_PARSER_BUG);
233            } // switch
234
235            if (!fetched) {
236                fetchTokenInCC();
237            }
238
239        } // while
240
241        if (arg.state == CCSTATE.VALUE) {
242            arg.v = 0; // ??? safe v ?
243            arg.vIsRaw = false;
244            cc.nextStateValue(arg, env);
245        }
246
247        if (prevCC != null) {
248            prevCC.and(cc);
249            cc = prevCC;
250        }
251
252        if (neg) {
253            cc.setNot();
254        } else {
255            cc.clearNot();
256        }
257
258        if (cc.isNot() && syntax.notNewlineInNegativeCC()) {
259            if (!cc.isEmpty()) {
260                final int NEW_LINE = 0x0a;
261                if (EncodingHelper.isNewLine(NEW_LINE)) {
262                    cc.bs.set(NEW_LINE);
263                }
264            }
265        }
266
267        return cc;
268    }
269
270    private void parseCharClassSbChar(final CClassNode cc, final CCStateArg arg) {
271        arg.inType = CCVALTYPE.SB;
272        arg.v = token.getC();
273        arg.vIsRaw = false;
274        parseCharClassValEntry2(cc, arg); // goto val_entry2
275    }
276
277    private void parseCharClassRangeEndVal(final CClassNode cc, final CCStateArg arg) {
278        arg.v = '-';
279        arg.vIsRaw = false;
280        parseCharClassValEntry(cc, arg); // goto val_entry
281    }
282
283    private void parseCharClassValEntry(final CClassNode cc, final CCStateArg arg) {
284        arg.inType = arg.v <= 0xff ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT;
285        parseCharClassValEntry2(cc, arg); // val_entry2:
286    }
287
288    private void parseCharClassValEntry2(final CClassNode cc, final CCStateArg arg) {
289        cc.nextStateValue(arg, env);
290    }
291
292    private Node parseEnclose(final TokenType term) {
293        Node node = null;
294
295        if (!left()) {
296            throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
297        }
298
299        int option = env.option;
300
301        if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
302            inc();
303            if (!left()) {
304                throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
305            }
306
307            fetch();
308            switch(c) {
309            case ':':  /* (?:...) grouping only */
310                fetchToken(); // group:
311                node = parseSubExp(term);
312                returnCode = 1; /* group */
313                return node;
314            case '=':
315                node = new AnchorNode(AnchorType.PREC_READ);
316                break;
317            case '!':  /*         preceding read */
318                node = new AnchorNode(AnchorType.PREC_READ_NOT);
319                break;
320            case '>':  /* (?>...) stop backtrack */
321                node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
322                break;
323            case '\'':
324                break;
325            case '<':  /* look behind (?<=...), (?<!...) */
326                fetch();
327                if (c == '=') {
328                    node = new AnchorNode(AnchorType.LOOK_BEHIND);
329                } else if (c == '!') {
330                    node = new AnchorNode(AnchorType.LOOK_BEHIND_NOT);
331                } else {
332                    throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
333                }
334                break;
335            case '@':
336                if (syntax.op2AtMarkCaptureHistory()) {
337                    final EncloseNode en = new EncloseNode(); // node_new_enclose_memory
338                    final int num = env.addMemEntry();
339                    if (num >= BitStatus.BIT_STATUS_BITS_NUM) {
340                        throw new ValueException(ERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
341                    }
342                    en.regNum = num;
343                    node = en;
344                } else {
345                    throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
346                }
347                break;
348
349            // case 'p': #ifdef USE_POSIXLINE_OPTION
350            case '-':
351            case 'i':
352            case 'm':
353            case 's':
354            case 'x':
355                boolean neg = false;
356                while (true) {
357                    switch(c) {
358                    case ':':
359                    case ')':
360                        break;
361                    case '-':
362                        neg = true;
363                        break;
364                    case 'x':
365                        option = bsOnOff(option, Option.EXTEND, neg);
366                        break;
367                    case 'i':
368                        option = bsOnOff(option, Option.IGNORECASE, neg);
369                        break;
370                    case 's':
371                        if (syntax.op2OptionPerl()) {
372                            option = bsOnOff(option, Option.MULTILINE, neg);
373                        } else {
374                            throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
375                        }
376                        break;
377                    case 'm':
378                        if (syntax.op2OptionPerl()) {
379                            option = bsOnOff(option, Option.SINGLELINE, !neg);
380                        } else if (syntax.op2OptionRuby()) {
381                            option = bsOnOff(option, Option.MULTILINE, neg);
382                        } else {
383                            throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
384                        }
385                        break;
386                    // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined
387                    // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg);
388                    // break;
389
390                    default:
391                        throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
392                    } // switch
393
394                    if (c == ')') {
395                        final EncloseNode en = new EncloseNode(option, 0); // node_new_option
396                        node = en;
397                        returnCode = 2; /* option only */
398                        return node;
399                    } else if (c == ':') {
400                        final int prev = env.option;
401                        env.option = option;
402                        fetchToken();
403                        final Node target = parseSubExp(term);
404                        env.option = prev;
405                        final EncloseNode en = new EncloseNode(option, 0); // node_new_option
406                        en.setTarget(target);
407                        node = en;
408                        returnCode = 0;
409                        return node;
410                    }
411                    if (!left()) {
412                        throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
413                    }
414                    fetch();
415                } // while
416
417            default:
418                throw new SyntaxException(ERR_UNDEFINED_GROUP_OPTION);
419            } // switch
420
421        } else {
422            if (isDontCaptureGroup(env.option)) {
423                fetchToken(); // goto group
424                node = parseSubExp(term);
425                returnCode = 1; /* group */
426                return node;
427            }
428            final EncloseNode en = new EncloseNode(); // node_new_enclose_memory
429            final int num = env.addMemEntry();
430            en.regNum = num;
431            node = en;
432        }
433
434        fetchToken();
435        final Node target = parseSubExp(term);
436
437        if (node.getType() == NodeType.ANCHOR) {
438            final AnchorNode an = (AnchorNode) node;
439            an.setTarget(target);
440        } else {
441            final EncloseNode en = (EncloseNode)node;
442            en.setTarget(target);
443            if (en.type == EncloseType.MEMORY) {
444                /* Don't move this to previous of parse_subexp() */
445                env.setMemNode(en.regNum, node);
446            }
447        }
448        returnCode = 0;
449        return node; // ??
450    }
451
452    private Node parseExp(final TokenType term) {
453        if (token.type == term)
454         {
455            return StringNode.EMPTY; // goto end_of_token
456        }
457
458        Node node = null;
459        boolean group = false;
460
461        switch(token.type) {
462        case ALT:
463        case EOT:
464            return StringNode.EMPTY; // end_of_token:, node_new_empty
465
466        case SUBEXP_OPEN:
467            node = parseEnclose(TokenType.SUBEXP_CLOSE);
468            if (returnCode == 1) {
469                group = true;
470            } else if (returnCode == 2) { /* option only */
471                final int prev = env.option;
472                final EncloseNode en = (EncloseNode)node;
473                env.option = en.option;
474                fetchToken();
475                final Node target = parseSubExp(term);
476                env.option = prev;
477                en.setTarget(target);
478                return node;
479            }
480            break;
481        case SUBEXP_CLOSE:
482            if (!syntax.allowUnmatchedCloseSubexp()) {
483                throw new SyntaxException(ERR_UNMATCHED_CLOSE_PARENTHESIS);
484            }
485            if (token.escaped) {
486                return parseExpTkRawByte(group); // goto tk_raw_byte
487            }
488            return parseExpTkByte(group); // goto tk_byte
489        case STRING:
490            return parseExpTkByte(group); // tk_byte:
491
492        case RAW_BYTE:
493            return parseExpTkRawByte(group); // tk_raw_byte:
494        case CODE_POINT:
495            final char[] buf = new char[] {(char)token.getCode()};
496            // #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG ... // setRaw() #else
497            node = new StringNode(buf, 0, 1);
498            break;
499
500        case CHAR_TYPE:
501            switch(token.getPropCType()) {
502            case CharacterType.D:
503            case CharacterType.S:
504            case CharacterType.W:
505                if (Config.NON_UNICODE_SDW) {
506                    final CClassNode cc = new CClassNode();
507                    cc.addCType(token.getPropCType(), false, env, this);
508                    if (token.getPropNot()) {
509                        cc.setNot();
510                    }
511                    node = cc;
512                }
513                break;
514
515            case CharacterType.SPACE:
516            case CharacterType.DIGIT:
517            case CharacterType.XDIGIT:
518                // #ifdef USE_SHARED_CCLASS_TABLE ... #endif
519                final CClassNode ccn = new CClassNode();
520                ccn.addCType(token.getPropCType(), false, env, this);
521                if (token.getPropNot()) {
522                    ccn.setNot();
523                }
524                node = ccn;
525                break;
526
527            default:
528                throw new InternalException(ERR_PARSER_BUG);
529
530            } // inner switch
531            break;
532
533        case CC_CC_OPEN:
534            final CClassNode cc = parseCharClass();
535            node = cc;
536            if (isIgnoreCase(env.option)) {
537                final ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc);
538                EncodingHelper.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg);
539
540                if (arg.altRoot != null) {
541                    node = ConsAltNode.newAltNode(node, arg.altRoot);
542                }
543            }
544            break;
545
546        case ANYCHAR:
547            node = new AnyCharNode();
548            break;
549
550        case ANYCHAR_ANYTIME:
551            node = new AnyCharNode();
552            final QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false);
553            qn.setTarget(node);
554            node = qn;
555            break;
556
557        case BACKREF:
558            final int backRef = token.getBackrefRef();
559            node = new BackRefNode(backRef, env);
560            break;
561
562        case ANCHOR:
563            node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma
564            break;
565
566        case OP_REPEAT:
567        case INTERVAL:
568            if (syntax.contextIndepRepeatOps()) {
569                if (syntax.contextInvalidRepeatOps()) {
570                    throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED);
571                }
572                node = StringNode.EMPTY; // node_new_empty
573            } else {
574                return parseExpTkByte(group); // goto tk_byte
575            }
576            break;
577
578        default:
579            throw new InternalException(ERR_PARSER_BUG);
580        } //switch
581
582        //targetp = node;
583
584        fetchToken(); // re_entry:
585
586        return parseExpRepeat(node, group); // repeat:
587    }
588
589    private Node parseExpTkByte(final boolean group) {
590        final StringNode node = new StringNode(chars, token.backP, p); // tk_byte:
591        while (true) {
592            fetchToken();
593            if (token.type != TokenType.STRING) {
594                break;
595            }
596
597            if (token.backP == node.end) {
598                node.end = p; // non escaped character, remain shared, just increase shared range
599            } else {
600                node.cat(chars, token.backP, p); // non continuous string stream, need to COW
601            }
602        }
603        // targetp = node;
604        return parseExpRepeat(node, group); // string_end:, goto repeat
605    }
606
607    private Node parseExpTkRawByte(final boolean group) {
608        // tk_raw_byte:
609
610        // important: we don't use 0xff mask here neither in the compiler
611        // (in the template string) so we won't have to mask target
612        // strings when comparing against them in the matcher
613        final StringNode node = new StringNode((char)token.getC());
614        node.setRaw();
615
616        fetchToken();
617        node.clearRaw();
618        // !goto string_end;!
619        return parseExpRepeat(node, group);
620    }
621
622    private Node parseExpRepeat(final Node targetp, final boolean group) {
623        Node target = targetp;
624        while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
625            if (target.isInvalidQuantifier()) {
626                throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
627            }
628
629            final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
630                                                     token.getRepeatUpper(),
631                                                     token.type == TokenType.INTERVAL);
632
633            qtfr.greedy = token.getRepeatGreedy();
634            final int ret = qtfr.setQuantifier(target, group, env, chars, getBegin(), getEnd());
635            Node qn = qtfr;
636
637            if (token.getRepeatPossessive()) {
638                final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
639                en.setTarget(qn);
640                qn = en;
641            }
642
643            if (ret == 0) {
644                target = qn;
645            } else if (ret == 2) { /* split case: /abc+/ */
646                target = ConsAltNode.newListNode(target, null);
647                final ConsAltNode tmp = ((ConsAltNode)target).setCdr(ConsAltNode.newListNode(qn, null));
648
649                fetchToken();
650                return parseExpRepeatForCar(target, tmp, group);
651            }
652            fetchToken(); // goto re_entry
653        }
654        return target;
655    }
656
657    private Node parseExpRepeatForCar(final Node top, final ConsAltNode target, final boolean group) {
658        while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat:
659            if (target.car.isInvalidQuantifier()) {
660                throw new SyntaxException(ERR_TARGET_OF_REPEAT_OPERATOR_INVALID);
661            }
662
663            final QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(),
664                                                     token.getRepeatUpper(),
665                                                     token.type == TokenType.INTERVAL);
666
667            qtfr.greedy = token.getRepeatGreedy();
668            final int ret = qtfr.setQuantifier(target.car, group, env, chars, getBegin(), getEnd());
669            Node qn = qtfr;
670
671            if (token.getRepeatPossessive()) {
672                final EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose
673                en.setTarget(qn);
674                qn = en;
675            }
676
677            if (ret == 0) {
678                target.setCar(qn);
679            } else if (ret == 2) { /* split case: /abc+/ */
680                assert false;
681            }
682            fetchToken(); // goto re_entry
683        }
684        return top;
685    }
686
687    private Node parseBranch(final TokenType term) {
688        Node node = parseExp(term);
689
690        if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) {
691            return node;
692        }
693        final ConsAltNode top = ConsAltNode.newListNode(node, null);
694        ConsAltNode t = top;
695
696        while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) {
697            node = parseExp(term);
698            if (node.getType() == NodeType.LIST) {
699                t.setCdr((ConsAltNode)node);
700                while (((ConsAltNode)node).cdr != null ) {
701                    node = ((ConsAltNode)node).cdr;
702                }
703
704                t = ((ConsAltNode)node);
705            } else {
706                t.setCdr(ConsAltNode.newListNode(node, null));
707                t = t.cdr;
708            }
709        }
710        return top;
711    }
712
713    /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
714    private Node parseSubExp(final TokenType term) {
715        Node node = parseBranch(term);
716
717        if (token.type == term) {
718            return node;
719        } else if (token.type == TokenType.ALT) {
720            final ConsAltNode top = ConsAltNode.newAltNode(node, null);
721            ConsAltNode t = top;
722            while (token.type == TokenType.ALT) {
723                fetchToken();
724                node = parseBranch(term);
725
726                t.setCdr(ConsAltNode.newAltNode(node, null));
727                t = t.cdr;
728            }
729
730            if (token.type != term) {
731                parseSubExpError(term);
732            }
733            return top;
734        } else {
735            parseSubExpError(term);
736            return null; //not reached
737        }
738    }
739
740    private static void parseSubExpError(final TokenType term) {
741        if (term == TokenType.SUBEXP_CLOSE) {
742            throw new SyntaxException(ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS);
743        }
744        throw new InternalException(ERR_PARSER_BUG);
745    }
746
747    private Node parseRegexp() {
748        fetchToken();
749        return parseSubExp(TokenType.EOT);
750    }
751}
752