1# Copyright (C) 2011 Apple Inc. All rights reserved. 2# Copyright (C) 2012 Sony Network Entertainment. All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions 6# are met: 7# 1. Redistributions of source code must retain the above copyright 8# notice, this list of conditions and the following disclaimer. 9# 2. Redistributions in binary form must reproduce the above copyright 10# notice, this list of conditions and the following disclaimer in the 11# documentation and/or other materials provided with the distribution. 12# 13# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 25import sys 26import string 27import operator 28 29keywordsText = open(sys.argv[1]).read() 30 31# A second argument signifies that the output 32# should be redirected to a file 33redirect_to_file = len(sys.argv) > 2 34 35# Change stdout to point to the file if requested 36if redirect_to_file: 37 file_output = open(sys.argv[-1], "w") 38 sys.stdout = file_output 39 40# Observed weights of the most common keywords, rounded to 2.s.d 41keyWordWeights = { 42 "catch": 0.01, 43 "try": 0.01, 44 "while": 0.01, 45 "case": 0.01, 46 "break": 0.01, 47 "new": 0.01, 48 "in": 0.01, 49 "typeof": 0.02, 50 "true": 0.02, 51 "false": 0.02, 52 "for": 0.03, 53 "null": 0.03, 54 "else": 0.03, 55 "return": 0.13, 56 "var": 0.13, 57 "if": 0.16, 58 "function": 0.18, 59 "this": 0.18, 60} 61 62 63def allWhitespace(str): 64 for c in str: 65 if not(c in string.whitespace): 66 return False 67 return True 68 69 70def parseKeywords(keywordsText): 71 72 if sys.platform == "cygwin": 73 keywordsText = keywordsText.replace("\r\n", "\n") 74 75 lines = keywordsText.split("\n") 76 lines = [line.split("#")[0] for line in lines] 77 lines = [line for line in lines if (not allWhitespace(line))] 78 name = lines[0].split() 79 terminator = lines[-1] 80 81 if not name[0] == "@begin": 82 raise Exception("expected description beginning with @begin") 83 if not terminator == "@end": 84 raise Exception("expected description ending with @end") 85 86 lines = lines[1:-1] # trim off the old heading 87 return [line.split() for line in lines] 88 89 90def makePadding(size): 91 str = "" 92 for i in range(size): 93 str = str + " " 94 return str 95 96 97class Trie: 98 def __init__(self, prefix): 99 self.prefix = prefix 100 self.keys = {} 101 self.value = None 102 103 def insert(self, key, value): 104 if len(key) == 0: 105 self.value = value 106 return 107 if not (key[0] in self.keys): 108 self.keys[key[0]] = Trie(key[0]) 109 self.keys[key[0]].insert(key[1:], value) 110 111 def coalesce(self): 112 keys = {} 113 for k, v in self.keys.items(): 114 t = v.coalesce() 115 keys[t.prefix] = t 116 self.keys = keys 117 if self.value != None: 118 return self 119 if len(self.keys) != 1: 120 return self 121 # Python 3: for() loop for compatibility. Use next() when Python 2.6 is the baseline. 122 for (prefix, suffix) in self.keys.items(): 123 res = Trie(self.prefix + prefix) 124 res.value = suffix.value 125 res.keys = suffix.keys 126 return res 127 128 def fillOut(self, prefix=""): 129 self.fullPrefix = prefix + self.prefix 130 weight = 0 131 if self.fullPrefix in keyWordWeights: 132 weight = weight + keyWordWeights[self.fullPrefix] 133 self.selfWeight = weight 134 for trie in self.keys.values(): 135 trie.fillOut(self.fullPrefix) 136 weight = weight + trie.weight 137 self.keys = [(trie.prefix, trie) for trie in sorted(self.keys.values(), key=operator.attrgetter('weight'), reverse=True)] 138 self.weight = weight 139 140 def printSubTreeAsC(self, typeName, indent): 141 str = makePadding(indent) 142 143 if self.value != None: 144 print(str + "if (!isIdentPart(code[%d])) {" % (len(self.fullPrefix))) 145 print(str + " internalShift<%d>();" % len(self.fullPrefix)) 146 print(str + " if (shouldCreateIdentifier)") 147 print(str + (" data->ident = &m_vm->propertyNames->%sKeyword;" % self.fullPrefix)) 148 print(str + " return " + self.value + ";") 149 print(str + "}") 150 rootIndex = len(self.fullPrefix) 151 itemCount = 0 152 for k, trie in self.keys: 153 baseIndex = rootIndex 154 if (baseIndex > 0) and (len(k) == 3): 155 baseIndex = baseIndex - 1 156 k = trie.fullPrefix[baseIndex] + k 157 test = [("'%s'" % c) for c in k] 158 if len(test) == 1: 159 comparison = "code[%d] == %s" % (baseIndex, test[0]) 160 else: 161 base = "code" 162 if baseIndex > 0: 163 base = "code + %d" % baseIndex 164 comparison = ("COMPARE_%d%sS(%s, " % (len(test), typeName, base)) + ", ".join(test) + ")" 165 if itemCount == 0: 166 print(str + "if (" + comparison + ") {") 167 else: 168 print(str + "} else if (" + comparison + ") {") 169 170 trie.printSubTreeAsC(typeName, indent + 4) 171 itemCount = itemCount + 1 172 173 if itemCount == len(self.keys): 174 print(str + "}") 175 176 def maxLength(self): 177 max = len(self.fullPrefix) 178 for (_, trie) in self.keys: 179 l = trie.maxLength() 180 if l > max: 181 max = l 182 return max 183 184 def printAsC(self): 185 print("namespace JSC {") 186 print("") 187 print("static ALWAYS_INLINE bool isIdentPart(LChar c);") 188 print("static ALWAYS_INLINE bool isIdentPart(UChar c);") 189 # max length + 1 so we don't need to do any bounds checking at all 190 print("static const int maxTokenLength = %d;" % (self.maxLength() + 1)) 191 print("") 192 print("template <>") 193 print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseKeyword(JSTokenData* data)") 194 print("{") 195 print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);") 196 print("") 197 print(" const UChar* code = m_code;") 198 self.printSubTreeAsC("UCHAR", 4) 199 print(" return IDENT;") 200 print("}") 201 print("") 202 print("template <>") 203 print("template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseKeyword(JSTokenData* data)") 204 print("{") 205 print(" ASSERT(m_codeEnd - m_code >= maxTokenLength);") 206 print("") 207 print(" const LChar* code = m_code;") 208 self.printSubTreeAsC("CHAR", 4) 209 print(" return IDENT;") 210 print("}") 211 print("") 212 print("} // namespace JSC") 213 214keywords = parseKeywords(keywordsText) 215trie = Trie("") 216for k, v in keywords: 217 trie.insert(k, v) 218trie.coalesce() 219trie.fillOut() 220print("// This file was generated by KeywordLookupGenerator.py. Do not edit.") 221print(""" 222#if CPU(NEEDS_ALIGNED_ACCESS) 223 224#define COMPARE_2CHARS(address, char1, char2) \\ 225 (((address)[0] == char1) && ((address)[1] == char2)) 226#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ 227 (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4)) 228#define COMPARE_2UCHARS(address, char1, char2) \\ 229 (((address)[0] == char1) && ((address)[1] == char2)) 230#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ 231 (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4)) 232 233#else // CPU(NEEDS_ALIGNED_ACCESS) 234 235#if CPU(BIG_ENDIAN) 236 237#define CHARPAIR_TOUINT16(a, b) \\ 238 ((((uint16_t)(a)) << 8) + (uint16_t)(b)) 239#define CHARQUAD_TOUINT32(a, b, c, d) \\ 240 ((((uint32_t)(CHARPAIR_TOUINT16(a, b))) << 16) + CHARPAIR_TOUINT16(c, d)) 241#define UCHARPAIR_TOUINT32(a, b) \\ 242 ((((uint32_t)(a)) << 16) + (uint32_t)(b)) 243#define UCHARQUAD_TOUINT64(a, b, c, d) \\ 244 ((((uint64_t)(UCHARQUAD_TOUINT64(a, b))) << 32) + UCHARPAIR_TOUINT32(c, d)) 245 246#else // CPU(BIG_ENDIAN) 247 248#define CHARPAIR_TOUINT16(a, b) \\ 249 ((((uint16_t)(b)) << 8) + (uint16_t)(a)) 250#define CHARQUAD_TOUINT32(a, b, c, d) \\ 251 ((((uint32_t)(CHARPAIR_TOUINT16(c, d))) << 16) + CHARPAIR_TOUINT16(a, b)) 252#define UCHARPAIR_TOUINT32(a, b) \\ 253 ((((uint32_t)(b)) << 16) + (uint32_t)(a)) 254#define UCHARQUAD_TOUINT64(a, b, c, d) \\ 255 ((((uint64_t)(UCHARPAIR_TOUINT32(c, d))) << 32) + UCHARPAIR_TOUINT32(a, b)) 256 257#endif // CPU(BIG_ENDIAN) 258 259 260#define COMPARE_2CHARS(address, char1, char2) \\ 261 (((uint16_t*)(address))[0] == CHARPAIR_TOUINT16(char1, char2)) 262#define COMPARE_2UCHARS(address, char1, char2) \\ 263 (((uint32_t*)(address))[0] == UCHARPAIR_TOUINT32(char1, char2)) 264 265#if CPU(X86_64) 266 267#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ 268 (((uint32_t*)(address))[0] == CHARQUAD_TOUINT32(char1, char2, char3, char4)) 269#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ 270 (((uint64_t*)(address))[0] == UCHARQUAD_TOUINT64(char1, char2, char3, char4)) 271 272#else // CPU(X86_64) 273 274#define COMPARE_4CHARS(address, char1, char2, char3, char4) \\ 275 (COMPARE_2CHARS(address, char1, char2) && COMPARE_2CHARS((address) + 2, char3, char4)) 276#define COMPARE_4UCHARS(address, char1, char2, char3, char4) \\ 277 (COMPARE_2UCHARS(address, char1, char2) && COMPARE_2UCHARS((address) + 2, char3, char4)) 278 279#endif // CPU(X86_64) 280 281#endif // CPU(NEEDS_ALIGNED_ACCESS) 282 283#define COMPARE_3CHARS(address, char1, char2, char3) \\ 284 (COMPARE_2CHARS(address, char1, char2) && ((address)[2] == (char3))) 285#define COMPARE_3UCHARS(address, char1, char2, char3) \\ 286 (COMPARE_2UCHARS(address, char1, char2) && ((address)[2] == (char3))) 287#define COMPARE_5CHARS(address, char1, char2, char3, char4, char5) \\ 288 (COMPARE_4CHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5))) 289#define COMPARE_5UCHARS(address, char1, char2, char3, char4, char5) \\ 290 (COMPARE_4UCHARS(address, char1, char2, char3, char4) && ((address)[4] == (char5))) 291#define COMPARE_6CHARS(address, char1, char2, char3, char4, char5, char6) \\ 292 (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_2CHARS(address + 4, char5, char6)) 293#define COMPARE_6UCHARS(address, char1, char2, char3, char4, char5, char6) \\ 294 (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_2UCHARS(address + 4, char5, char6)) 295#define COMPARE_7CHARS(address, char1, char2, char3, char4, char5, char6, char7) \\ 296 (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 3, char4, char5, char6, char7)) 297#define COMPARE_7UCHARS(address, char1, char2, char3, char4, char5, char6, char7) \\ 298 (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 3, char4, char5, char6, char7)) 299#define COMPARE_8CHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\ 300 (COMPARE_4CHARS(address, char1, char2, char3, char4) && COMPARE_4CHARS(address + 4, char5, char6, char7, char8)) 301#define COMPARE_8UCHARS(address, char1, char2, char3, char4, char5, char6, char7, char8) \\ 302 (COMPARE_4UCHARS(address, char1, char2, char3, char4) && COMPARE_4UCHARS(address + 4, char5, char6, char7, char8)) 303""") 304 305trie.printAsC() 306 307# Close the redirected file if requested 308if (redirect_to_file): 309 file_output.close() 310 sys.stdout = sys.__stdout__ 311