1# udis86 - scripts/itab.py 2# 3# Copyright (c) 2009 Vivek Thampi 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without modification, 7# are permitted provided that the following conditions are met: 8# 9# * Redistributions of source code must retain the above copyright notice, 10# this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above copyright notice, 12# this list of conditions and the following disclaimer in the documentation 13# and/or other materials provided with the distribution. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 22# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 26from optparse import OptionParser 27import os 28import sys 29 30sys.path.append( '../scripts' ); 31 32import ud_optable 33import ud_opcode 34 35class UdItabGenerator( ud_opcode.UdOpcodeTables ): 36 37 OperandDict = { 38 "Ap" : [ "OP_A" , "SZ_P" ], 39 "E" : [ "OP_E" , "SZ_NA" ], 40 "Eb" : [ "OP_E" , "SZ_B" ], 41 "Ew" : [ "OP_E" , "SZ_W" ], 42 "Ev" : [ "OP_E" , "SZ_V" ], 43 "Ed" : [ "OP_E" , "SZ_D" ], 44 "Eq" : [ "OP_E" , "SZ_Q" ], 45 "Ez" : [ "OP_E" , "SZ_Z" ], 46 "Ex" : [ "OP_E" , "SZ_MDQ" ], 47 "Ep" : [ "OP_E" , "SZ_P" ], 48 "G" : [ "OP_G" , "SZ_NA" ], 49 "Gb" : [ "OP_G" , "SZ_B" ], 50 "Gw" : [ "OP_G" , "SZ_W" ], 51 "Gv" : [ "OP_G" , "SZ_V" ], 52 "Gy" : [ "OP_G" , "SZ_MDQ" ], 53 "Gy" : [ "OP_G" , "SZ_MDQ" ], 54 "Gd" : [ "OP_G" , "SZ_D" ], 55 "Gq" : [ "OP_G" , "SZ_Q" ], 56 "Gx" : [ "OP_G" , "SZ_MDQ" ], 57 "Gz" : [ "OP_G" , "SZ_Z" ], 58 "M" : [ "OP_M" , "SZ_NA" ], 59 "Mb" : [ "OP_M" , "SZ_B" ], 60 "Mw" : [ "OP_M" , "SZ_W" ], 61 "Ms" : [ "OP_M" , "SZ_W" ], 62 "Md" : [ "OP_M" , "SZ_D" ], 63 "Mq" : [ "OP_M" , "SZ_Q" ], 64 "Mt" : [ "OP_M" , "SZ_T" ], 65 "Mo" : [ "OP_M" , "SZ_O" ], 66 "MwRv" : [ "OP_MR" , "SZ_WV" ], 67 "MdRy" : [ "OP_MR" , "SZ_DY" ], 68 "MbRv" : [ "OP_MR" , "SZ_BV" ], 69 "I1" : [ "OP_I1" , "SZ_NA" ], 70 "I3" : [ "OP_I3" , "SZ_NA" ], 71 "Ib" : [ "OP_I" , "SZ_B" ], 72 "Isb" : [ "OP_I" , "SZ_SB" ], 73 "Iw" : [ "OP_I" , "SZ_W" ], 74 "Iv" : [ "OP_I" , "SZ_V" ], 75 "Iz" : [ "OP_I" , "SZ_Z" ], 76 "Jv" : [ "OP_J" , "SZ_V" ], 77 "Jz" : [ "OP_J" , "SZ_Z" ], 78 "Jb" : [ "OP_J" , "SZ_B" ], 79 "R" : [ "OP_R" , "SZ_RDQ" ], 80 "C" : [ "OP_C" , "SZ_NA" ], 81 "D" : [ "OP_D" , "SZ_NA" ], 82 "S" : [ "OP_S" , "SZ_NA" ], 83 "Ob" : [ "OP_O" , "SZ_B" ], 84 "Ow" : [ "OP_O" , "SZ_W" ], 85 "Ov" : [ "OP_O" , "SZ_V" ], 86 "V" : [ "OP_V" , "SZ_O" ], 87 "W" : [ "OP_W" , "SZ_O" ], 88 "Wsd" : [ "OP_W" , "SZ_O" ], 89 "Wss" : [ "OP_W" , "SZ_O" ], 90 "P" : [ "OP_P" , "SZ_Q" ], 91 "Q" : [ "OP_Q" , "SZ_Q" ], 92 "VR" : [ "OP_VR" , "SZ_O" ], 93 "PR" : [ "OP_PR" , "SZ_Q" ], 94 "AL" : [ "OP_AL" , "SZ_NA" ], 95 "CL" : [ "OP_CL" , "SZ_NA" ], 96 "DL" : [ "OP_DL" , "SZ_NA" ], 97 "BL" : [ "OP_BL" , "SZ_NA" ], 98 "AH" : [ "OP_AH" , "SZ_NA" ], 99 "CH" : [ "OP_CH" , "SZ_NA" ], 100 "DH" : [ "OP_DH" , "SZ_NA" ], 101 "BH" : [ "OP_BH" , "SZ_NA" ], 102 "AX" : [ "OP_AX" , "SZ_NA" ], 103 "CX" : [ "OP_CX" , "SZ_NA" ], 104 "DX" : [ "OP_DX" , "SZ_NA" ], 105 "BX" : [ "OP_BX" , "SZ_NA" ], 106 "SI" : [ "OP_SI" , "SZ_NA" ], 107 "DI" : [ "OP_DI" , "SZ_NA" ], 108 "SP" : [ "OP_SP" , "SZ_NA" ], 109 "BP" : [ "OP_BP" , "SZ_NA" ], 110 "eAX" : [ "OP_eAX" , "SZ_NA" ], 111 "eCX" : [ "OP_eCX" , "SZ_NA" ], 112 "eDX" : [ "OP_eDX" , "SZ_NA" ], 113 "eBX" : [ "OP_eBX" , "SZ_NA" ], 114 "eSI" : [ "OP_eSI" , "SZ_NA" ], 115 "eDI" : [ "OP_eDI" , "SZ_NA" ], 116 "eSP" : [ "OP_eSP" , "SZ_NA" ], 117 "eBP" : [ "OP_eBP" , "SZ_NA" ], 118 "rAX" : [ "OP_rAX" , "SZ_NA" ], 119 "rCX" : [ "OP_rCX" , "SZ_NA" ], 120 "rBX" : [ "OP_rBX" , "SZ_NA" ], 121 "rDX" : [ "OP_rDX" , "SZ_NA" ], 122 "rSI" : [ "OP_rSI" , "SZ_NA" ], 123 "rDI" : [ "OP_rDI" , "SZ_NA" ], 124 "rSP" : [ "OP_rSP" , "SZ_NA" ], 125 "rBP" : [ "OP_rBP" , "SZ_NA" ], 126 "ES" : [ "OP_ES" , "SZ_NA" ], 127 "CS" : [ "OP_CS" , "SZ_NA" ], 128 "DS" : [ "OP_DS" , "SZ_NA" ], 129 "SS" : [ "OP_SS" , "SZ_NA" ], 130 "GS" : [ "OP_GS" , "SZ_NA" ], 131 "FS" : [ "OP_FS" , "SZ_NA" ], 132 "ST0" : [ "OP_ST0" , "SZ_NA" ], 133 "ST1" : [ "OP_ST1" , "SZ_NA" ], 134 "ST2" : [ "OP_ST2" , "SZ_NA" ], 135 "ST3" : [ "OP_ST3" , "SZ_NA" ], 136 "ST4" : [ "OP_ST4" , "SZ_NA" ], 137 "ST5" : [ "OP_ST5" , "SZ_NA" ], 138 "ST6" : [ "OP_ST6" , "SZ_NA" ], 139 "ST7" : [ "OP_ST7" , "SZ_NA" ], 140 "NONE" : [ "OP_NONE" , "SZ_NA" ], 141 "ALr8b" : [ "OP_ALr8b" , "SZ_NA" ], 142 "CLr9b" : [ "OP_CLr9b" , "SZ_NA" ], 143 "DLr10b" : [ "OP_DLr10b" , "SZ_NA" ], 144 "BLr11b" : [ "OP_BLr11b" , "SZ_NA" ], 145 "AHr12b" : [ "OP_AHr12b" , "SZ_NA" ], 146 "CHr13b" : [ "OP_CHr13b" , "SZ_NA" ], 147 "DHr14b" : [ "OP_DHr14b" , "SZ_NA" ], 148 "BHr15b" : [ "OP_BHr15b" , "SZ_NA" ], 149 "rAXr8" : [ "OP_rAXr8" , "SZ_NA" ], 150 "rCXr9" : [ "OP_rCXr9" , "SZ_NA" ], 151 "rDXr10" : [ "OP_rDXr10" , "SZ_NA" ], 152 "rBXr11" : [ "OP_rBXr11" , "SZ_NA" ], 153 "rSPr12" : [ "OP_rSPr12" , "SZ_NA" ], 154 "rBPr13" : [ "OP_rBPr13" , "SZ_NA" ], 155 "rSIr14" : [ "OP_rSIr14" , "SZ_NA" ], 156 "rDIr15" : [ "OP_rDIr15" , "SZ_NA" ], 157 "jWP" : [ "OP_J" , "SZ_WP" ], 158 "jDP" : [ "OP_J" , "SZ_DP" ], 159 160 } 161 162 # 163 # opcode prefix dictionary 164 # 165 PrefixDict = { 166 "aso" : "P_aso", 167 "oso" : "P_oso", 168 "rexw" : "P_rexw", 169 "rexb" : "P_rexb", 170 "rexx" : "P_rexx", 171 "rexr" : "P_rexr", 172 "seg" : "P_seg", 173 "inv64" : "P_inv64", 174 "def64" : "P_def64", 175 "depM" : "P_depM", 176 "cast1" : "P_c1", 177 "cast2" : "P_c2", 178 "cast3" : "P_c3", 179 "cast" : "P_cast", 180 "sext" : "P_sext" 181 } 182 183 InvalidEntryIdx = 0 184 InvalidEntry = { 'type' : 'invalid', 185 'mnemonic' : 'invalid', 186 'operands' : '', 187 'prefixes' : '', 188 'meta' : '' } 189 190 Itab = [] # instruction table 191 ItabIdx = 1 # instruction table index 192 GtabIdx = 0 # group table index 193 GtabMeta = [] 194 195 ItabLookup = {} 196 197 MnemonicAliases = ( "invalid", "3dnow", "none", "db", "pause" ) 198 199 def __init__( self, outputDir ): 200 # first itab entry (0) is Invalid 201 self.Itab.append( self.InvalidEntry ) 202 self.MnemonicsTable.extend( self.MnemonicAliases ) 203 self.outputDir = outputDir 204 205 def toGroupId( self, id ): 206 return 0x8000 | id 207 208 def genLookupTable( self, table, scope = '' ): 209 idxArray = [ ] 210 ( tabIdx, self.GtabIdx ) = ( self.GtabIdx, self.GtabIdx + 1 ) 211 self.GtabMeta.append( { 'type' : table[ 'type' ], 'meta' : table[ 'meta' ] } ) 212 213 for _idx in range( self.sizeOfTable( table[ 'type' ] ) ): 214 idx = "%02x" % _idx 215 216 e = self.InvalidEntry 217 i = self.InvalidEntryIdx 218 219 if idx in table[ 'entries' ].keys(): 220 e = table[ 'entries' ][ idx ] 221 222 # leaf node (insn) 223 if e[ 'type' ] == 'insn': 224 ( i, self.ItabIdx ) = ( self.ItabIdx, self.ItabIdx + 1 ) 225 self.Itab.append( e ) 226 elif e[ 'type' ] != 'invalid': 227 i = self.genLookupTable( e, 'static' ) 228 229 idxArray.append( i ) 230 231 name = "ud_itab__%s" % tabIdx 232 self.ItabLookup[ tabIdx ] = name 233 234 self.ItabC.write( "\n" ); 235 if len( scope ): 236 self.ItabC.write( scope + ' ' ) 237 self.ItabC.write( "const uint16_t %s[] = {\n" % name ) 238 for i in range( len( idxArray ) ): 239 if i > 0 and i % 4 == 0: 240 self.ItabC.write( "\n" ) 241 if ( i%4 == 0 ): 242 self.ItabC.write( " /* %2x */" % i) 243 if idxArray[ i ] >= 0x8000: 244 self.ItabC.write( "%12s," % ("GROUP(%d)" % ( ~0x8000 & idxArray[ i ] ))) 245 else: 246 self.ItabC.write( "%12d," % ( idxArray[ i ] )) 247 self.ItabC.write( "\n" ) 248 self.ItabC.write( "};\n" ) 249 250 return self.toGroupId( tabIdx ) 251 252 def genLookupTableList( self ): 253 self.ItabC.write( "\n\n" ); 254 self.ItabC.write( "struct ud_lookup_table_list_entry ud_lookup_table_list[] = {\n" ) 255 for i in range( len( self.GtabMeta ) ): 256 f0 = self.ItabLookup[ i ] + "," 257 f1 = ( self.nameOfTable( self.GtabMeta[ i ][ 'type' ] ) ) + "," 258 f2 = "\"%s\"" % self.GtabMeta[ i ][ 'meta' ] 259 self.ItabC.write( " /* %03d */ { %s %s %s },\n" % ( i, f0, f1, f2 ) ) 260 self.ItabC.write( "};" ) 261 262 def genInsnTable( self ): 263 self.ItabC.write( "struct ud_itab_entry ud_itab[] = {\n" ); 264 idx = 0 265 for e in self.Itab: 266 opr_c = [ "O_NONE", "O_NONE", "O_NONE" ] 267 pfx_c = [] 268 opr = e[ 'operands' ] 269 for i in range(len(opr)): 270 if not (opr[i] in self.OperandDict.keys()): 271 print("error: invalid operand declaration: %s\n" % opr[i]) 272 opr_c[i] = "O_" + opr[i] 273 opr = "%s %s %s" % (opr_c[0] + ",", opr_c[1] + ",", opr_c[2]) 274 275 for p in e['prefixes']: 276 if not ( p in self.PrefixDict.keys() ): 277 print("error: invalid prefix specification: %s \n" % pfx) 278 pfx_c.append( self.PrefixDict[p] ) 279 if len(e['prefixes']) == 0: 280 pfx_c.append( "P_none" ) 281 pfx = "|".join( pfx_c ) 282 283 self.ItabC.write( " /* %04d */ { UD_I%s %s, %s },\n" \ 284 % ( idx, e[ 'mnemonic' ] + ',', opr, pfx ) ) 285 idx += 1 286 self.ItabC.write( "};\n" ) 287 288 self.ItabC.write( "\n\n" ); 289 self.ItabC.write( "const char * ud_mnemonics_str[] = {\n" ) 290 self.ItabC.write( ",\n ".join( [ "\"%s\"" % m for m in self.MnemonicsTable ] ) ) 291 self.ItabC.write( "\n};\n" ) 292 293 294 def genItabH( self ): 295 self.ItabH = open( os.path.join(self.outputDir, "udis86_itab.h"), "w" ) 296 297 # Generate Table Type Enumeration 298 self.ItabH.write( "#ifndef UD_ITAB_H\n" ) 299 self.ItabH.write( "#define UD_ITAB_H\n\n" ) 300 301 # table type enumeration 302 self.ItabH.write( "/* ud_table_type -- lookup table types (see lookup.c) */\n" ) 303 self.ItabH.write( "enum ud_table_type {\n " ) 304 enum = [ self.TableInfo[ k ][ 'name' ] for k in self.TableInfo.keys() ] 305 self.ItabH.write( ",\n ".join( enum ) ) 306 self.ItabH.write( "\n};\n\n" ); 307 308 # mnemonic enumeration 309 self.ItabH.write( "/* ud_mnemonic -- mnemonic constants */\n" ) 310 enum = "enum ud_mnemonic_code {\n " 311 enum += ",\n ".join( [ "UD_I%s" % m for m in self.MnemonicsTable ] ) 312 enum += "\n} UD_ATTR_PACKED;\n" 313 self.ItabH.write( enum ) 314 self.ItabH.write( "\n" ) 315 316 self.ItabH.write("\n/* itab entry operand definitions */\n"); 317 operands = self.OperandDict.keys() 318 operands.sort() 319 for o in operands: 320 self.ItabH.write("#define O_%-7s { %-12s %-8s }\n" % 321 (o, self.OperandDict[o][0] + ",", self.OperandDict[o][1])); 322 self.ItabH.write("\n\n"); 323 324 self.ItabH.write( "extern const char * ud_mnemonics_str[];\n" ) 325 326 self.ItabH.write( "#define GROUP(n) (0x8000 | (n))" ) 327 328 self.ItabH.write( "\n#endif /* UD_ITAB_H */\n" ) 329 330 self.ItabH.close() 331 332 333 def genItabC( self ): 334 self.ItabC = open( os.path.join(self.outputDir, "udis86_itab.c"), "w" ) 335 self.ItabC.write( "/* itab.c -- generated by itab.py, do no edit" ) 336 self.ItabC.write( " */\n" ); 337 self.ItabC.write( "#include \"udis86_decode.h\"\n\n" ); 338 339 self.genLookupTable( self.OpcodeTable0 ) 340 self.genLookupTableList() 341 self.genInsnTable() 342 343 self.ItabC.close() 344 345 def genItab( self ): 346 self.genItabC() 347 self.genItabH() 348 349def main(): 350 parser = OptionParser() 351 parser.add_option("--outputDir", dest="outputDir", default="") 352 options, args = parser.parse_args() 353 generator = UdItabGenerator(os.path.normpath(options.outputDir)) 354 optableXmlParser = ud_optable.UdOptableXmlParser() 355 optableXmlParser.parse( args[ 0 ], generator.addInsnDef ) 356 357 generator.genItab() 358 359if __name__ == '__main__': 360 main() 361