Lexer.java revision 1088:7e62d98d4625
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni;
21
22import static jdk.nashorn.internal.runtime.regexp.joni.Option.isSingleline;
23import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite;
24import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
25import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
26import jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar;
27import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
28import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
29import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
30import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
31import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
32
33class Lexer extends ScannerSupport {
34    protected final ScanEnvironment env;
35    protected final Syntax syntax;              // fast access to syntax
36    protected final Token token = new Token();  // current token
37
38    protected Lexer(final ScanEnvironment env, final char[] chars, final int p, final int end) {
39        super(chars, p, end);
40        this.env = env;
41        this.syntax = env.syntax;
42    }
43
44    /**
45     * @return 0: normal {n,m}, 2: fixed {n}
46     * !introduce returnCode here
47     */
48    private int fetchRangeQuantifier() {
49        mark();
50        final boolean synAllow = syntax.allowInvalidInterval();
51
52        if (!left()) {
53            if (synAllow) {
54                return 1; /* "....{" : OK! */
55            }
56            throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
57        }
58
59        if (!synAllow) {
60            c = peek();
61            if (c == ')' || c == '(' || c == '|') {
62                throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
63            }
64        }
65
66        int low = scanUnsignedNumber();
67        if (low < 0) {
68            throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
69        }
70        if (low > Config.MAX_REPEAT_NUM) {
71            throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
72        }
73
74        boolean nonLow = false;
75        if (p == _p) { /* can't read low */
76            if (syntax.allowIntervalLowAbbrev()) {
77                low = 0;
78                nonLow = true;
79            } else {
80                return invalidRangeQuantifier(synAllow);
81            }
82        }
83
84        if (!left()) {
85            return invalidRangeQuantifier(synAllow);
86        }
87
88        fetch();
89        int up;
90        int ret = 0;
91        if (c == ',') {
92            final int prev = p; // ??? last
93            up = scanUnsignedNumber();
94            if (up < 0) {
95                throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
96            }
97            if (up > Config.MAX_REPEAT_NUM) {
98                throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
99            }
100
101            if (p == prev) {
102                if (nonLow) {
103                    return invalidRangeQuantifier(synAllow);
104                }
105                up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */
106            }
107        } else {
108            if (nonLow) {
109                return invalidRangeQuantifier(synAllow);
110            }
111            unfetch();
112            up = low; /* {n} : exact n times */
113            ret = 2; /* fixed */
114        }
115
116        if (!left()) {
117            return invalidRangeQuantifier(synAllow);
118        }
119        fetch();
120
121        if (syntax.opEscBraceInterval()) {
122            if (c != syntax.metaCharTable.esc) {
123                return invalidRangeQuantifier(synAllow);
124            }
125            fetch();
126        }
127
128        if (c != '}') {
129            return invalidRangeQuantifier(synAllow);
130        }
131
132        if (!isRepeatInfinite(up) && low > up) {
133            throw new ValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
134        }
135
136        token.type = TokenType.INTERVAL;
137        token.setRepeatLower(low);
138        token.setRepeatUpper(up);
139
140        return ret; /* 0: normal {n,m}, 2: fixed {n} */
141    }
142
143    private int invalidRangeQuantifier(final boolean synAllow) {
144        if (synAllow) {
145            restore();
146            return 1;
147        }
148        throw new SyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
149    }
150
151    @SuppressWarnings("fallthrough")
152    /* \M-, \C-, \c, or \... */
153    private int fetchEscapedValue() {
154        if (!left()) {
155            throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
156        }
157        fetch();
158
159        switch(c) {
160
161        case 'M':
162            if (syntax.op2EscCapitalMBarMeta()) {
163                if (!left()) {
164                    throw new SyntaxException(ERR_END_PATTERN_AT_META);
165                }
166                fetch();
167                if (c != '-') {
168                    throw new SyntaxException(ERR_META_CODE_SYNTAX);
169                }
170                if (!left()) {
171                    throw new SyntaxException(ERR_END_PATTERN_AT_META);
172                }
173                fetch();
174                if (c == syntax.metaCharTable.esc) {
175                    c = fetchEscapedValue();
176                }
177                c = ((c & 0xff) | 0x80);
178            } else {
179                fetchEscapedValueBackSlash();
180            }
181            break;
182
183        case 'C':
184            if (syntax.op2EscCapitalCBarControl()) {
185                if (!left()) {
186                    throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
187                }
188                fetch();
189                if (c != '-') {
190                    throw new SyntaxException(ERR_CONTROL_CODE_SYNTAX);
191                }
192                fetchEscapedValueControl();
193            } else {
194                fetchEscapedValueBackSlash();
195            }
196            break;
197
198        case 'c':
199            if (syntax.opEscCControl()) {
200                fetchEscapedValueControl();
201            }
202            /* fall through */
203
204        default:
205            fetchEscapedValueBackSlash();
206        } // switch
207
208        return c; // ???
209    }
210
211    private void fetchEscapedValueBackSlash() {
212        c = env.convertBackslashValue(c);
213    }
214
215    private void fetchEscapedValueControl() {
216        if (!left()) {
217            throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
218        }
219        fetch();
220        if (c == '?') {
221            c = 0177;
222        } else {
223            if (c == syntax.metaCharTable.esc) {
224                c = fetchEscapedValue();
225            }
226            c &= 0x9f;
227        }
228    }
229
230    private void fetchTokenInCCFor_charType(final boolean flag, final int type) {
231        token.type = TokenType.CHAR_TYPE;
232        token.setPropCType(type);
233        token.setPropNot(flag);
234    }
235
236    private void fetchTokenInCCFor_x() {
237        if (!left()) {
238            return;
239        }
240        final int last = p;
241
242        if (peekIs('{') && syntax.opEscXBraceHex8()) {
243            inc();
244            final int num = scanUnsignedHexadecimalNumber(8);
245            if (num < 0) {
246                throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
247            }
248            if (left()) {
249                final int c2 = peek();
250                if (EncodingHelper.isXDigit(c2)) {
251                    throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
252                }
253            }
254
255            if (p > last + 1 && left() && peekIs('}')) {
256                inc();
257                token.type = TokenType.CODE_POINT;
258                token.setCode(num);
259            } else {
260                /* can't read nothing or invalid format */
261                p = last;
262            }
263        } else if (syntax.opEscXHex2()) {
264            int num = scanUnsignedHexadecimalNumber(2);
265            if (num < 0) {
266                throw new ValueException(ERR_TOO_BIG_NUMBER);
267            }
268            if (p == last) { /* can't read nothing. */
269                num = 0; /* but, it's not error */
270            }
271            token.type = TokenType.RAW_BYTE;
272            token.setC(num);
273        }
274    }
275
276    private void fetchTokenInCCFor_u() {
277        if (!left()) {
278            return;
279        }
280        final int last = p;
281
282        if (syntax.op2EscUHex4()) {
283            int num = scanUnsignedHexadecimalNumber(4);
284            if (num < 0) {
285                throw new ValueException(ERR_TOO_BIG_NUMBER);
286            }
287            if (p == last) {  /* can't read nothing. */
288                num = 0; /* but, it's not error */
289            }
290            token.type = TokenType.CODE_POINT;
291            token.setCode(num);
292        }
293    }
294
295    private void fetchTokenInCCFor_digit() {
296        if (syntax.opEscOctal3()) {
297            unfetch();
298            final int last = p;
299            int num = scanUnsignedOctalNumber(3);
300            if (num < 0) {
301                throw new ValueException(ERR_TOO_BIG_NUMBER);
302            }
303            if (p == last) {  /* can't read nothing. */
304                num = 0; /* but, it's not error */
305            }
306            token.type = TokenType.RAW_BYTE;
307            token.setC(num);
308        }
309    }
310
311    private void fetchTokenInCCFor_and() {
312        if (syntax.op2CClassSetOp() && left() && peekIs('&')) {
313            inc();
314            token.type = TokenType.CC_AND;
315        }
316    }
317
318    protected final TokenType fetchTokenInCC() {
319        if (!left()) {
320            token.type = TokenType.EOT;
321            return token.type;
322        }
323
324        fetch();
325        token.type = TokenType.CHAR;
326        token.setC(c);
327        token.escaped = false;
328
329        if (c == ']') {
330            token.type = TokenType.CC_CLOSE;
331        } else if (c == '-') {
332            token.type = TokenType.CC_RANGE;
333        } else if (c == syntax.metaCharTable.esc) {
334            if (!syntax.backSlashEscapeInCC()) {
335                return token.type;
336            }
337            if (!left()) {
338                throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
339            }
340            fetch();
341            token.escaped = true;
342            token.setC(c);
343
344            switch (c) {
345            case 'w':
346                fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
347                break;
348            case 'W':
349                fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
350                break;
351            case 'd':
352                fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
353                break;
354            case 'D':
355                fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
356                break;
357            case 's':
358                fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
359                break;
360            case 'S':
361                fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
362                break;
363            case 'h':
364                if (syntax.op2EscHXDigit()) {
365                    fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
366                }
367                break;
368            case 'H':
369                if (syntax.op2EscHXDigit()) {
370                    fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
371                }
372                break;
373            case 'x':
374                fetchTokenInCCFor_x();
375                break;
376            case 'u':
377                fetchTokenInCCFor_u();
378                break;
379            case '0':
380            case '1':
381            case '2':
382            case '3':
383            case '4':
384            case '5':
385            case '6':
386            case '7':
387                fetchTokenInCCFor_digit();
388                break;
389
390            default:
391                unfetch();
392                final int num = fetchEscapedValue();
393                if (token.getC() != num) {
394                    token.setCode(num);
395                    token.type = TokenType.CODE_POINT;
396                }
397                break;
398            } // switch
399
400        } else if (c == '&') {
401            fetchTokenInCCFor_and();
402        }
403        return token.type;
404    }
405
406    private void fetchTokenFor_repeat(final int lower, final int upper) {
407        token.type = TokenType.OP_REPEAT;
408        token.setRepeatLower(lower);
409        token.setRepeatUpper(upper);
410        greedyCheck();
411    }
412
413    private void fetchTokenFor_openBrace() {
414        switch (fetchRangeQuantifier()) {
415        case 0:
416            greedyCheck();
417            break;
418        case 2:
419            if (syntax.fixedIntervalIsGreedyOnly()) {
420                possessiveCheck();
421            } else {
422                greedyCheck();
423            }
424            break;
425        default: /* 1 : normal char */
426        } // inner switch
427    }
428
429    private void fetchTokenFor_anchor(final int subType) {
430        token.type = TokenType.ANCHOR;
431        token.setAnchor(subType);
432    }
433
434    private void fetchTokenFor_xBrace() {
435        if (!left()) {
436            return;
437        }
438
439        final int last = p;
440        if (peekIs('{') && syntax.opEscXBraceHex8()) {
441            inc();
442            final int num = scanUnsignedHexadecimalNumber(8);
443            if (num < 0) {
444                throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
445            }
446            if (left()) {
447                if (EncodingHelper.isXDigit(peek())) {
448                    throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
449                }
450            }
451
452            if (p > last + 1 && left() && peekIs('}')) {
453                inc();
454                token.type = TokenType.CODE_POINT;
455                token.setCode(num);
456            } else {
457                /* can't read nothing or invalid format */
458                p = last;
459            }
460        } else if (syntax.opEscXHex2()) {
461            int num = scanUnsignedHexadecimalNumber(2);
462            if (num < 0) {
463                throw new ValueException(ERR_TOO_BIG_NUMBER);
464            }
465            if (p == last) { /* can't read nothing. */
466                num = 0; /* but, it's not error */
467            }
468            token.type = TokenType.RAW_BYTE;
469            token.setC(num);
470        }
471    }
472
473    private void fetchTokenFor_uHex() {
474        if (!left()) {
475            return;
476        }
477        final int last = p;
478
479        if (syntax.op2EscUHex4()) {
480            int num = scanUnsignedHexadecimalNumber(4);
481            if (num < 0) {
482                throw new ValueException(ERR_TOO_BIG_NUMBER);
483            }
484            if (p == last) { /* can't read nothing. */
485                num = 0; /* but, it's not error */
486            }
487            token.type = TokenType.CODE_POINT;
488            token.setCode(num);
489        }
490    }
491
492    private void fetchTokenFor_digit() {
493        unfetch();
494        final int last = p;
495        final int num = scanUnsignedNumber();
496        if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref
497        } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
498            if (syntax.strictCheckBackref()) {
499                if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) {
500                    throw new ValueException(ERR_INVALID_BACKREF);
501                }
502            }
503            token.type = TokenType.BACKREF;
504            token.setBackrefRef(num);
505            return;
506        }
507
508        if (c == '8' || c == '9') { /* normal char */ // skip_backref:
509            p = last;
510            inc();
511            return;
512        }
513        p = last;
514
515        fetchTokenFor_zero(); /* fall through */
516    }
517
518    private void fetchTokenFor_zero() {
519        if (syntax.opEscOctal3()) {
520            final int last = p;
521            int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3);
522            if (num < 0) {
523                throw new ValueException(ERR_TOO_BIG_NUMBER);
524            }
525            if (p == last) { /* can't read nothing. */
526                num = 0; /* but, it's not error */
527            }
528            token.type = TokenType.RAW_BYTE;
529            token.setC(num);
530        } else if (c != '0') {
531            inc();
532        }
533    }
534
535    private void fetchTokenFor_metaChars() {
536        if (c == syntax.metaCharTable.anyChar) {
537            token.type = TokenType.ANYCHAR;
538        } else if (c == syntax.metaCharTable.anyTime) {
539            fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
540        }  else if (c == syntax.metaCharTable.zeroOrOneTime) {
541            fetchTokenFor_repeat(0, 1);
542        } else if (c == syntax.metaCharTable.oneOrMoreTime) {
543            fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
544        } else if (c == syntax.metaCharTable.anyCharAnyTime) {
545            token.type = TokenType.ANYCHAR_ANYTIME;
546            // goto out
547        }
548    }
549
550    protected final TokenType fetchToken() {
551        // mark(); // out
552        start:
553        while(true) {
554            if (!left()) {
555                token.type = TokenType.EOT;
556                return token.type;
557            }
558
559            token.type = TokenType.STRING;
560            token.backP = p;
561
562            fetch();
563
564            if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
565                if (!left()) {
566                    throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
567                }
568
569                token.backP = p;
570                fetch();
571
572                token.setC(c);
573                token.escaped = true;
574                switch(c) {
575
576                case '*':
577                    if (syntax.opEscAsteriskZeroInf()) {
578                        fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
579                    }
580                    break;
581                case '+':
582                    if (syntax.opEscPlusOneInf()) {
583                        fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
584                    }
585                    break;
586                case '?':
587                    if (syntax.opEscQMarkZeroOne()) {
588                        fetchTokenFor_repeat(0, 1);
589                    }
590                    break;
591                case '{':
592                    if (syntax.opEscBraceInterval()) {
593                        fetchTokenFor_openBrace();
594                    }
595                    break;
596                case '|':
597                    if (syntax.opEscVBarAlt()) {
598                        token.type = TokenType.ALT;
599                    }
600                    break;
601                case '(':
602                    if (syntax.opEscLParenSubexp()) {
603                        token.type = TokenType.SUBEXP_OPEN;
604                    }
605                    break;
606                case ')':
607                    if (syntax.opEscLParenSubexp()) {
608                        token.type = TokenType.SUBEXP_CLOSE;
609                    }
610                    break;
611                case 'w':
612                    if (syntax.opEscWWord()) {
613                        fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
614                    }
615                    break;
616                case 'W':
617                    if (syntax.opEscWWord()) {
618                        fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
619                    }
620                    break;
621                case 'b':
622                    if (syntax.opEscBWordBound()) {
623                        fetchTokenFor_anchor(AnchorType.WORD_BOUND);
624                    }
625                    break;
626                case 'B':
627                    if (syntax.opEscBWordBound()) {
628                        fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND);
629                    }
630                    break;
631                case '<':
632                    if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
633                        fetchTokenFor_anchor(AnchorType.WORD_BEGIN);
634                    }
635                    break;
636                case '>':
637                    if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
638                        fetchTokenFor_anchor(AnchorType.WORD_END);
639                    }
640                    break;
641                case 's':
642                    if (syntax.opEscSWhiteSpace()) {
643                        fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
644                    }
645                    break;
646                case 'S':
647                    if (syntax.opEscSWhiteSpace()) {
648                        fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
649                    }
650                    break;
651                case 'd':
652                    if (syntax.opEscDDigit()) {
653                        fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
654                    }
655                    break;
656                case 'D':
657                    if (syntax.opEscDDigit()) {
658                        fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
659                    }
660                    break;
661                case 'h':
662                    if (syntax.op2EscHXDigit()) {
663                        fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
664                    }
665                    break;
666                case 'H':
667                    if (syntax.op2EscHXDigit()) {
668                        fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
669                    }
670                    break;
671                case 'A':
672                    if (syntax.opEscAZBufAnchor()) {
673                        fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
674                    }
675                    break;
676                case 'Z':
677                    if (syntax.opEscAZBufAnchor()) {
678                        fetchTokenFor_anchor(AnchorType.SEMI_END_BUF);
679                    }
680                    break;
681                case 'z':
682                    if (syntax.opEscAZBufAnchor()) {
683                        fetchTokenFor_anchor(AnchorType.END_BUF);
684                    }
685                    break;
686                case 'G':
687                    if (syntax.opEscCapitalGBeginAnchor()) {
688                        fetchTokenFor_anchor(AnchorType.BEGIN_POSITION);
689                    }
690                    break;
691                case '`':
692                    if (syntax.op2EscGnuBufAnchor()) {
693                        fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
694                    }
695                    break;
696                case '\'':
697                    if (syntax.op2EscGnuBufAnchor()) {
698                        fetchTokenFor_anchor(AnchorType.END_BUF);
699                    }
700                    break;
701                case 'x':
702                    fetchTokenFor_xBrace();
703                    break;
704                case 'u':
705                    fetchTokenFor_uHex();
706                    break;
707                case '1':
708                case '2':
709                case '3':
710                case '4':
711                case '5':
712                case '6':
713                case '7':
714                case '8':
715                case '9':
716                    fetchTokenFor_digit();
717                    break;
718                case '0':
719                    fetchTokenFor_zero();
720                    break;
721
722                default:
723                    unfetch();
724                    final int num = fetchEscapedValue();
725
726                    /* set_raw: */
727                    if (token.getC() != num) {
728                        token.type = TokenType.CODE_POINT;
729                        token.setCode(num);
730                    } else { /* string */
731                        p = token.backP + 1;
732                    }
733                    break;
734
735                } // switch (c)
736
737            } else {
738                token.setC(c);
739                token.escaped = false;
740
741                if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) {
742                    fetchTokenFor_metaChars();
743                    break;
744                }
745
746                {
747                    switch(c) {
748                    case '.':
749                        if (syntax.opDotAnyChar()) {
750                            token.type = TokenType.ANYCHAR;
751                        }
752                        break;
753                    case '*':
754                        if (syntax.opAsteriskZeroInf()) {
755                            fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
756                        }
757                        break;
758                    case '+':
759                        if (syntax.opPlusOneInf()) {
760                            fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
761                        }
762                        break;
763                    case '?':
764                        if (syntax.opQMarkZeroOne()) {
765                            fetchTokenFor_repeat(0, 1);
766                        }
767                        break;
768                    case '{':
769                        if (syntax.opBraceInterval()) {
770                            fetchTokenFor_openBrace();
771                        }
772                        break;
773                    case '|':
774                        if (syntax.opVBarAlt()) {
775                            token.type = TokenType.ALT;
776                        }
777                        break;
778
779                    case '(':
780                        if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
781                            inc();
782                            if (peekIs('#')) {
783                                fetch();
784                                while (true) {
785                                    if (!left()) {
786                                        throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
787                                    }
788                                    fetch();
789                                    if (c == syntax.metaCharTable.esc) {
790                                        if (left()) {
791                                            fetch();
792                                        }
793                                    } else {
794                                        if (c == ')') {
795                                            break;
796                                        }
797                                    }
798                                }
799                                continue start; // goto start
800                            }
801                            unfetch();
802                        }
803
804                        if (syntax.opLParenSubexp()) {
805                            token.type = TokenType.SUBEXP_OPEN;
806                        }
807                        break;
808                    case ')':
809                        if (syntax.opLParenSubexp()) {
810                            token.type = TokenType.SUBEXP_CLOSE;
811                        }
812                        break;
813                    case '^':
814                        if (syntax.opLineAnchor()) {
815                            fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
816                        }
817                        break;
818                    case '$':
819                        if (syntax.opLineAnchor()) {
820                            fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE);
821                        }
822                        break;
823                    case '[':
824                        if (syntax.opBracketCC()) {
825                            token.type = TokenType.CC_CC_OPEN;
826                        }
827                        break;
828                    case ']':
829                        //if (*src > env->pattern)   /* /].../ is allowed. */
830                        //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
831                        break;
832                    case '#':
833                        if (Option.isExtend(env.option)) {
834                            while (left()) {
835                                fetch();
836                                if (EncodingHelper.isNewLine(c)) {
837                                    break;
838                                }
839                            }
840                            continue start; // goto start
841                        }
842                        break;
843
844                    case ' ':
845                    case '\t':
846                    case '\n':
847                    case '\r':
848                    case '\f':
849                        if (Option.isExtend(env.option))
850                         {
851                            continue start; // goto start
852                        }
853                        break;
854
855                    default: // string
856                        break;
857
858                    } // switch
859                }
860            }
861
862            break;
863        } // while
864        return token.type;
865    }
866
867    private void greedyCheck() {
868        if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) {
869
870            fetch();
871
872            token.setRepeatGreedy(false);
873            token.setRepeatPossessive(false);
874        } else {
875            possessiveCheck();
876        }
877    }
878
879    private void possessiveCheck() {
880        if (left() && peekIs('+') &&
881            (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL ||
882             syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) {
883
884            fetch();
885
886            token.setRepeatGreedy(true);
887            token.setRepeatPossessive(true);
888        } else {
889            token.setRepeatGreedy(true);
890            token.setRepeatPossessive(false);
891        }
892    }
893
894    protected final void syntaxWarn(final String message, final char ch) {
895        syntaxWarn(message.replace("<%n>", Character.toString(ch)));
896    }
897
898    protected final void syntaxWarn(final String message) {
899        if (Config.USE_WARN) {
900            env.reg.warnings.warn(message + ": /" + new String(chars, getBegin(), getEnd()) + "/");
901        }
902    }
903}
904