1/*
2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/*
32 * Generate js file as follows:
33re2c -isc Source/WebCore/inspector/front-end/SourceJavaScriptTokenizer.re2js \
34  | sed 's|^yy\([^:]*\)*\:|case \1:|' \
35  | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
36  | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
37  | sed 's|[*]cursor|this._charAt(cursor)|' \
38  | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
39  | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
40  | sed 's|yych <= \(0x[0-9a-fA-F]*\)|yych \<\= String.fromCharCode(\1)|' \
41  | sed 's|unsigned\ int|var|' \
42  | sed 's|var\ yych|case 1: var yych|' > Source/WebCore/inspector/front-end/SourceJavaScriptTokenizer.js
43 */
44
45/**
46 * @constructor
47 * @extends {WebInspector.SourceTokenizer}
48 */
49WebInspector.SourceJavaScriptTokenizer = function()
50{
51    WebInspector.SourceTokenizer.call(this);
52
53    this._lexConditions = {
54        DIV: 0,
55        NODIV: 1,
56        COMMENT: 2,
57        DSTRING: 3,
58        SSTRING: 4,
59        REGEX: 5
60    };
61
62    this.case_DIV = 1000;
63    this.case_NODIV = 1001;
64    this.case_COMMENT = 1002;
65    this.case_DSTRING = 1003;
66    this.case_SSTRING = 1004;
67    this.case_REGEX = 1005;
68
69    this.condition = this.createInitialCondition();
70}
71
72WebInspector.SourceJavaScriptTokenizer.Keywords = [
73        "null", "true", "false", "break", "case", "catch", "const", "default", "finally", "for",
74        "instanceof", "new", "var", "continue", "function", "return", "void", "delete", "if",
75        "this", "do", "while", "else", "in", "switch", "throw", "try", "typeof", "debugger",
76        "class", "enum", "export", "extends", "import", "super", "get", "set", "with"
77    ].keySet();
78
79WebInspector.SourceJavaScriptTokenizer.GlobalObjectValueProperties = {
80    "NaN": "javascript-nan",
81    "undefined": "javascript-undef",
82    "Infinity": "javascript-inf"
83};
84
85WebInspector.SourceJavaScriptTokenizer.prototype = {
86    createInitialCondition: function()
87    {
88        return { lexCondition: this._lexConditions.NODIV };
89    },
90
91    nextToken: function(cursor)
92    {
93        var cursorOnEnter = cursor;
94        var gotoCase = 1;
95        var YYMARKER;
96        while (1) {
97            switch (gotoCase)
98            // Following comment is replaced with generated state machine.
99            /*!re2c
100                re2c:define:YYCTYPE  = "var";
101                re2c:define:YYCURSOR = cursor;
102                re2c:define:YYGETCONDITION = "this.getLexCondition";
103                re2c:define:YYSETCONDITION = "this.setLexCondition";
104                re2c:condprefix = "case this.case_";
105                re2c:condenumprefix = "this._lexConditions.";
106                re2c:yyfill:enable = 0;
107                re2c:labelprefix = "case ";
108                re2c:indent:top = 2;
109                re2c:indent:string = "    ";
110
111                LineComment = "//" [^\r\n]*;
112                CommentContent = ([^*\r\n] | ("*"+[^/*]))*;
113                Comment = "/*" CommentContent "*"+ "/";
114                CommentStart = "/*" CommentContent [\r\n];
115                CommentEnd = CommentContent "*"+ "/";
116
117                DecimalDigit = [0-9];
118                NonZeroDigit = [1-9];
119                OctalDigit = [0-7];
120                HexDigit = [0-9a-fA-F];
121                SignedInteger = ("+"|"-")? DecimalDigit+;
122                ExponentPart = ("e" | "E") SignedInteger;
123                DecimalIntegerLiteral = "0" | NonZeroDigit DecimalDigit*;
124                DecimalLiteral = DecimalIntegerLiteral "." DecimalDigit* ExponentPart? | "." DecimalDigit+ ExponentPart? | DecimalIntegerLiteral ExponentPart?;
125                HexIntegerLiteral = "0" ("x"|"X") HexDigit+;
126                OctalIntegerLiteral = "0" OctalDigit+;
127                NumericLiteral = DecimalLiteral | HexIntegerLiteral | OctalIntegerLiteral;
128
129                Punctuation = [\!\%\&\(\*\+\,\-\.\:\;\<\=\>\?\[\]\^\{\|\}\~] | "!=" | "!==" | "%=" | "&&" | "&=" | "*=" | "++" | "+=" | "--" | "-=" | "<<" | "<<="  | "<=" | "==" | "===" | ">=" | ">>" | ">>=" | ">>>" | ">>>=" | "^=" | "|=" | "||";
130                Division = "/" | "/=";
131                RightParen = ")";
132
133                Letter = [a-zA-Z\x80-\xFF];
134                UnicodeEscapeSequence = "\\u" HexDigit HexDigit HexDigit HexDigit;
135
136                IdentifierStart = Letter | "_" | "$" | UnicodeEscapeSequence;
137                IdentifierPart = IdentifierStart | DecimalDigit;
138                Identifier = IdentifierStart IdentifierPart *;
139                Spaces = " "+;
140
141                DoubleStringContent = ([^\r\n\"\\] | UnicodeEscapeSequence | "\\" ['"\\bfnrtv])*;
142                SingleStringContent = ([^\r\n\'\\] | UnicodeEscapeSequence | "\\" ['"\\bfnrtv])*;
143                StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
144                DoubleStringStart = "\"" DoubleStringContent "\\" [\r\n];
145                DoubleStringEnd = DoubleStringContent "\"";
146                SingleStringStart = "'" SingleStringContent "\\" [\r\n];
147                SingleStringEnd = SingleStringContent "'";
148
149                BackslashSequence = "\\" [^\r\n];
150                RegexSet = "[" ([^\r\n*\\/] | BackslashSequence)* "]";
151                RegexFirstChar = [^\r\n*\\/\[\]] | BackslashSequence | RegexSet;
152                RegexChar = [^\r\n\\/\[\]] | BackslashSequence | RegexSet;
153                RegexContent = RegexChar*;
154                Regex = "/" RegexFirstChar RegexContent "/" [igm]*;
155                RegexStart = "/" RegexFirstChar RegexContent "\\";
156                RegexEnd = RegexContent "/" [igm]*;
157
158                <DIV,NODIV> LineComment { this.tokenType = "javascript-comment"; return cursor; }
159                <DIV,NODIV> Comment { this.tokenType = "javascript-comment"; return cursor; }
160                <DIV,NODIV> CommentStart => COMMENT { this.tokenType = "javascript-comment"; return cursor; }
161                <COMMENT> CommentContent => COMMENT { this.tokenType = "javascript-comment"; return cursor; }
162                <COMMENT> CommentEnd => NODIV { this.tokenType = "javascript-comment"; return cursor; }
163
164                <DIV,NODIV> Spaces {this.tokenType = "whitespace"; return cursor; }
165                <DIV,NODIV> StringLiteral { this.tokenType = "javascript-string"; return cursor; }
166                <DIV,NODIV> DoubleStringStart => DSTRING { this.tokenType = "javascript-string"; return cursor; }
167                <DSTRING> DoubleStringContent => DSTRING { this.tokenType = "javascript-string"; return cursor; }
168                <DSTRING> DoubleStringEnd => NODIV { this.tokenType = "javascript-string"; return cursor; }
169                <DIV,NODIV> SingleStringStart => SSTRING { this.tokenType = "javascript-string"; return cursor; }
170                <SSTRING> SingleStringContent => SSTRING { this.tokenType = "javascript-string"; return cursor; }
171                <SSTRING> SingleStringEnd => NODIV { this.tokenType = "javascript-string"; return cursor; }
172
173                <NODIV> Regex { this.tokenType = "javascript-regexp"; return cursor; }
174                <NODIV> RegexStart => REGEX { this.tokenType = "javascript-regexp"; return cursor; }
175                <REGEX> RegexContent => REGEX { this.tokenType = "javascript-regexp"; return cursor; }
176                <REGEX> RegexEnd => NODIV { this.tokenType = "javascript-regexp"; return cursor; }
177
178                <DIV,NODIV> NumericLiteral => DIV { this.tokenType = "javascript-number"; return cursor; }
179                <DIV,NODIV> Identifier => DIV
180                {
181                    var token = this._line.substring(cursorOnEnter, cursor);
182                    if (WebInspector.SourceJavaScriptTokenizer.GlobalObjectValueProperties.hasOwnProperty(token))
183                        this.tokenType = WebInspector.SourceJavaScriptTokenizer.GlobalObjectValueProperties[token];
184                    else if (WebInspector.SourceJavaScriptTokenizer.Keywords[token] === true && token !== "__proto__")
185                        this.tokenType = "javascript-keyword";
186                    else
187                        this.tokenType = "javascript-ident";
188                    return cursor;
189                }
190                <DIV,NODIV> RightParen => DIV { this.tokenType = "brace-end"; return cursor; }
191                <DIV,NODIV> Punctuation => NODIV
192                {
193                    var token = this._line.charAt(cursorOnEnter);
194                    if (token === "{")
195                        this.tokenType = "block-start";
196                    else if (token === "}")
197                        this.tokenType = "block-end";
198                    else if (token === "(")
199                        this.tokenType = "brace-start";
200                    else this.tokenType = null;
201                    return cursor;
202                }
203                <DIV> Division => NODIV { this.tokenType = null; return cursor; }
204                <*> [^] { this.tokenType = null; return cursor; }
205            */
206        }
207    },
208
209    __proto__: WebInspector.SourceTokenizer.prototype
210}
211