1# udis86 - scripts/itab.py
2#
3# Copyright (c) 2009 Vivek Thampi
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without modification,
7# are permitted provided that the following conditions are met:
8#
9#     * Redistributions of source code must retain the above copyright notice,
10#       this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above copyright notice,
12#       this list of conditions and the following disclaimer in the documentation
13#       and/or other materials provided with the distribution.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26from optparse import OptionParser
27import os
28import sys
29
30sys.path.append( '../scripts' );
31
32import ud_optable
33import ud_opcode
34
35class UdItabGenerator( ud_opcode.UdOpcodeTables ):
36
37    OperandDict = {
38        "Ap"       : [    "OP_A"        , "SZ_P"     ],
39        "E"        : [    "OP_E"        , "SZ_NA"    ],
40        "Eb"       : [    "OP_E"        , "SZ_B"     ],
41        "Ew"       : [    "OP_E"        , "SZ_W"     ],
42        "Ev"       : [    "OP_E"        , "SZ_V"     ],
43        "Ed"       : [    "OP_E"        , "SZ_D"     ],
44        "Eq"       : [    "OP_E"        , "SZ_Q"     ],
45        "Ez"       : [    "OP_E"        , "SZ_Z"     ],
46        "Ex"       : [    "OP_E"        , "SZ_MDQ"   ],
47        "Ep"       : [    "OP_E"        , "SZ_P"     ],
48        "G"        : [    "OP_G"        , "SZ_NA"    ],
49        "Gb"       : [    "OP_G"        , "SZ_B"     ],
50        "Gw"       : [    "OP_G"        , "SZ_W"     ],
51        "Gv"       : [    "OP_G"        , "SZ_V"     ],
52        "Gy"       : [    "OP_G"        , "SZ_MDQ"   ],
53        "Gy"       : [    "OP_G"        , "SZ_MDQ"   ],
54        "Gd"       : [    "OP_G"        , "SZ_D"     ],
55        "Gq"       : [    "OP_G"        , "SZ_Q"     ],
56        "Gx"       : [    "OP_G"        , "SZ_MDQ"   ],
57        "Gz"       : [    "OP_G"        , "SZ_Z"     ],
58        "M"        : [    "OP_M"        , "SZ_NA"    ],
59        "Mb"       : [    "OP_M"        , "SZ_B"     ],
60        "Mw"       : [    "OP_M"        , "SZ_W"     ],
61        "Ms"       : [    "OP_M"        , "SZ_W"     ],
62        "Md"       : [    "OP_M"        , "SZ_D"     ],
63        "Mq"       : [    "OP_M"        , "SZ_Q"     ],
64        "Mt"       : [    "OP_M"        , "SZ_T"     ],
65        "Mo"       : [    "OP_M"        , "SZ_O"     ],
66        "MwRv"     : [    "OP_MR"       , "SZ_WV"    ],
67        "MdRy"     : [    "OP_MR"       , "SZ_DY"    ],
68        "MbRv"     : [    "OP_MR"       , "SZ_BV"    ],
69        "I1"       : [    "OP_I1"       , "SZ_NA"    ],
70        "I3"       : [    "OP_I3"       , "SZ_NA"    ],
71        "Ib"       : [    "OP_I"        , "SZ_B"     ],
72        "Isb"      : [    "OP_I"        , "SZ_SB"    ],
73        "Iw"       : [    "OP_I"        , "SZ_W"     ],
74        "Iv"       : [    "OP_I"        , "SZ_V"     ],
75        "Iz"       : [    "OP_I"        , "SZ_Z"     ],
76        "Jv"       : [    "OP_J"        , "SZ_V"     ],
77        "Jz"       : [    "OP_J"        , "SZ_Z"     ],
78        "Jb"       : [    "OP_J"        , "SZ_B"     ],
79        "R"        : [    "OP_R"        , "SZ_RDQ"   ],
80        "C"        : [    "OP_C"        , "SZ_NA"    ],
81        "D"        : [    "OP_D"        , "SZ_NA"    ],
82        "S"        : [    "OP_S"        , "SZ_NA"    ],
83        "Ob"       : [    "OP_O"        , "SZ_B"     ],
84        "Ow"       : [    "OP_O"        , "SZ_W"     ],
85        "Ov"       : [    "OP_O"        , "SZ_V"     ],
86        "V"        : [    "OP_V"        , "SZ_O"     ],
87        "W"        : [    "OP_W"        , "SZ_O"     ],
88        "Wsd"      : [    "OP_W"        , "SZ_O"     ],
89        "Wss"      : [    "OP_W"        , "SZ_O"     ],
90        "P"        : [    "OP_P"        , "SZ_Q"     ],
91        "Q"        : [    "OP_Q"        , "SZ_Q"     ],
92        "VR"       : [    "OP_VR"       , "SZ_O"     ],
93        "PR"       : [    "OP_PR"       , "SZ_Q"     ],
94        "AL"       : [    "OP_AL"       , "SZ_NA"    ],
95        "CL"       : [    "OP_CL"       , "SZ_NA"    ],
96        "DL"       : [    "OP_DL"       , "SZ_NA"    ],
97        "BL"       : [    "OP_BL"       , "SZ_NA"    ],
98        "AH"       : [    "OP_AH"       , "SZ_NA"    ],
99        "CH"       : [    "OP_CH"       , "SZ_NA"    ],
100        "DH"       : [    "OP_DH"       , "SZ_NA"    ],
101        "BH"       : [    "OP_BH"       , "SZ_NA"    ],
102        "AX"       : [    "OP_AX"       , "SZ_NA"    ],
103        "CX"       : [    "OP_CX"       , "SZ_NA"    ],
104        "DX"       : [    "OP_DX"       , "SZ_NA"    ],
105        "BX"       : [    "OP_BX"       , "SZ_NA"    ],
106        "SI"       : [    "OP_SI"       , "SZ_NA"    ],
107        "DI"       : [    "OP_DI"       , "SZ_NA"    ],
108        "SP"       : [    "OP_SP"       , "SZ_NA"    ],
109        "BP"       : [    "OP_BP"       , "SZ_NA"    ],
110        "eAX"      : [    "OP_eAX"      , "SZ_NA"    ],
111        "eCX"      : [    "OP_eCX"      , "SZ_NA"    ],
112        "eDX"      : [    "OP_eDX"      , "SZ_NA"    ],
113        "eBX"      : [    "OP_eBX"      , "SZ_NA"    ],
114        "eSI"      : [    "OP_eSI"      , "SZ_NA"    ],
115        "eDI"      : [    "OP_eDI"      , "SZ_NA"    ],
116        "eSP"      : [    "OP_eSP"      , "SZ_NA"    ],
117        "eBP"      : [    "OP_eBP"      , "SZ_NA"    ],
118        "rAX"      : [    "OP_rAX"      , "SZ_NA"    ],
119        "rCX"      : [    "OP_rCX"      , "SZ_NA"    ],
120        "rBX"      : [    "OP_rBX"      , "SZ_NA"    ],
121        "rDX"      : [    "OP_rDX"      , "SZ_NA"    ],
122        "rSI"      : [    "OP_rSI"      , "SZ_NA"    ],
123        "rDI"      : [    "OP_rDI"      , "SZ_NA"    ],
124        "rSP"      : [    "OP_rSP"      , "SZ_NA"    ],
125        "rBP"      : [    "OP_rBP"      , "SZ_NA"    ],
126        "ES"       : [    "OP_ES"       , "SZ_NA"    ],
127        "CS"       : [    "OP_CS"       , "SZ_NA"    ],
128        "DS"       : [    "OP_DS"       , "SZ_NA"    ],
129        "SS"       : [    "OP_SS"       , "SZ_NA"    ],
130        "GS"       : [    "OP_GS"       , "SZ_NA"    ],
131        "FS"       : [    "OP_FS"       , "SZ_NA"    ],
132        "ST0"      : [    "OP_ST0"      , "SZ_NA"    ],
133        "ST1"      : [    "OP_ST1"      , "SZ_NA"    ],
134        "ST2"      : [    "OP_ST2"      , "SZ_NA"    ],
135        "ST3"      : [    "OP_ST3"      , "SZ_NA"    ],
136        "ST4"      : [    "OP_ST4"      , "SZ_NA"    ],
137        "ST5"      : [    "OP_ST5"      , "SZ_NA"    ],
138        "ST6"      : [    "OP_ST6"      , "SZ_NA"    ],
139        "ST7"      : [    "OP_ST7"      , "SZ_NA"    ],
140        "NONE"     : [    "OP_NONE"     , "SZ_NA"    ],
141        "ALr8b"    : [    "OP_ALr8b"    , "SZ_NA"    ],
142        "CLr9b"    : [    "OP_CLr9b"    , "SZ_NA"    ],
143        "DLr10b"   : [    "OP_DLr10b"   , "SZ_NA"    ],
144        "BLr11b"   : [    "OP_BLr11b"   , "SZ_NA"    ],
145        "AHr12b"   : [    "OP_AHr12b"   , "SZ_NA"    ],
146        "CHr13b"   : [    "OP_CHr13b"   , "SZ_NA"    ],
147        "DHr14b"   : [    "OP_DHr14b"   , "SZ_NA"    ],
148        "BHr15b"   : [    "OP_BHr15b"   , "SZ_NA"    ],
149        "rAXr8"    : [    "OP_rAXr8"    , "SZ_NA"    ],
150        "rCXr9"    : [    "OP_rCXr9"    , "SZ_NA"    ],
151        "rDXr10"   : [    "OP_rDXr10"   , "SZ_NA"    ],
152        "rBXr11"   : [    "OP_rBXr11"   , "SZ_NA"    ],
153        "rSPr12"   : [    "OP_rSPr12"   , "SZ_NA"    ],
154        "rBPr13"   : [    "OP_rBPr13"   , "SZ_NA"    ],
155        "rSIr14"   : [    "OP_rSIr14"   , "SZ_NA"    ],
156        "rDIr15"   : [    "OP_rDIr15"   , "SZ_NA"    ],
157        "jWP"      : [    "OP_J"        , "SZ_WP"    ],
158        "jDP"      : [    "OP_J"        , "SZ_DP"    ],
159
160    }
161
162    #
163    # opcode prefix dictionary
164    #
165    PrefixDict = {
166        "aso"      : "P_aso",
167        "oso"      : "P_oso",
168        "rexw"     : "P_rexw",
169        "rexb"     : "P_rexb",
170        "rexx"     : "P_rexx",
171        "rexr"     : "P_rexr",
172        "seg"      : "P_seg",
173        "inv64"    : "P_inv64",
174        "def64"    : "P_def64",
175        "depM"     : "P_depM",
176        "cast1"    : "P_c1",
177        "cast2"    : "P_c2",
178        "cast3"    : "P_c3",
179        "cast"     : "P_cast",
180        "sext"     : "P_sext"
181    }
182
183    InvalidEntryIdx = 0
184    InvalidEntry = { 'type'     : 'invalid',
185                     'mnemonic' : 'invalid',
186                     'operands' : '',
187                     'prefixes' : '',
188                     'meta'     : '' }
189
190    Itab     = []   # instruction table
191    ItabIdx  = 1    # instruction table index
192    GtabIdx  = 0    # group table index
193    GtabMeta = []
194
195    ItabLookup = {}
196
197    MnemonicAliases = ( "invalid", "3dnow", "none", "db", "pause" )
198
199    def __init__( self, outputDir ):
200        # first itab entry (0) is Invalid
201        self.Itab.append( self.InvalidEntry )
202        self.MnemonicsTable.extend( self.MnemonicAliases )
203        self.outputDir = outputDir
204
205    def toGroupId( self, id ):
206        return 0x8000 | id
207
208    def genLookupTable( self, table, scope = '' ):
209        idxArray = [ ]
210        ( tabIdx, self.GtabIdx ) = ( self.GtabIdx, self.GtabIdx + 1 )
211        self.GtabMeta.append( { 'type' : table[ 'type' ], 'meta' : table[ 'meta' ] } )
212
213        for _idx in range( self.sizeOfTable( table[ 'type' ] ) ):
214            idx = "%02x" % _idx
215
216            e   = self.InvalidEntry
217            i   = self.InvalidEntryIdx
218
219            if idx in table[ 'entries' ].keys():
220                e = table[ 'entries' ][ idx ]
221
222            # leaf node (insn)
223            if e[ 'type' ] == 'insn':
224                ( i, self.ItabIdx ) = ( self.ItabIdx, self.ItabIdx + 1 )
225                self.Itab.append( e )
226            elif e[ 'type' ] != 'invalid':
227                i = self.genLookupTable( e, 'static' )
228
229            idxArray.append( i )
230
231        name = "ud_itab__%s" % tabIdx
232        self.ItabLookup[ tabIdx ] = name
233
234        self.ItabC.write( "\n" );
235        if len( scope ):
236            self.ItabC.write( scope + ' ' )
237        self.ItabC.write( "const uint16_t %s[] = {\n" % name )
238        for i in range( len( idxArray ) ):
239            if i > 0 and i % 4 == 0:
240                self.ItabC.write( "\n" )
241            if ( i%4 == 0 ):
242                self.ItabC.write( "  /* %2x */" % i)
243            if idxArray[ i ] >= 0x8000:
244                self.ItabC.write( "%12s," % ("GROUP(%d)" % ( ~0x8000 & idxArray[ i ] )))
245            else:
246                self.ItabC.write( "%12d," % ( idxArray[ i ] ))
247        self.ItabC.write( "\n" )
248        self.ItabC.write( "};\n" )
249
250        return self.toGroupId( tabIdx )
251
252    def genLookupTableList( self ):
253        self.ItabC.write( "\n\n"  );
254        self.ItabC.write( "struct ud_lookup_table_list_entry ud_lookup_table_list[] = {\n" )
255        for i in range( len( self.GtabMeta ) ):
256            f0 = self.ItabLookup[ i ] + ","
257            f1 = ( self.nameOfTable( self.GtabMeta[ i ][ 'type' ] ) ) + ","
258            f2 = "\"%s\"" % self.GtabMeta[ i ][ 'meta' ]
259            self.ItabC.write( "    /* %03d */ { %s %s %s },\n" % ( i, f0, f1, f2 ) )
260        self.ItabC.write( "};" )
261
262    def genInsnTable( self ):
263        self.ItabC.write( "struct ud_itab_entry ud_itab[] = {\n" );
264        idx = 0
265        for e in self.Itab:
266            opr_c = [ "O_NONE", "O_NONE", "O_NONE" ]
267            pfx_c = []
268            opr   = e[ 'operands' ]
269            for i in range(len(opr)):
270                if not (opr[i] in self.OperandDict.keys()):
271                    print("error: invalid operand declaration: %s\n" % opr[i])
272                opr_c[i] = "O_" + opr[i]
273            opr = "%s %s %s" % (opr_c[0] + ",", opr_c[1] + ",", opr_c[2])
274
275            for p in e['prefixes']:
276                if not ( p in self.PrefixDict.keys() ):
277                    print("error: invalid prefix specification: %s \n" % pfx)
278                pfx_c.append( self.PrefixDict[p] )
279            if len(e['prefixes']) == 0:
280                pfx_c.append( "P_none" )
281            pfx = "|".join( pfx_c )
282
283            self.ItabC.write( "  /* %04d */ { UD_I%s %s, %s },\n" \
284                        % ( idx, e[ 'mnemonic' ] + ',', opr, pfx ) )
285            idx += 1
286        self.ItabC.write( "};\n" )
287
288        self.ItabC.write( "\n\n"  );
289        self.ItabC.write( "const char * ud_mnemonics_str[] = {\n" )
290        self.ItabC.write( ",\n    ".join( [ "\"%s\"" % m for m in self.MnemonicsTable ] ) )
291        self.ItabC.write( "\n};\n" )
292
293
294    def genItabH( self ):
295        self.ItabH = open( os.path.join(self.outputDir, "udis86_itab.h"), "w" )
296
297        # Generate Table Type Enumeration
298        self.ItabH.write( "#ifndef UD_ITAB_H\n" )
299        self.ItabH.write( "#define UD_ITAB_H\n\n" )
300
301        # table type enumeration
302        self.ItabH.write( "/* ud_table_type -- lookup table types (see lookup.c) */\n" )
303        self.ItabH.write( "enum ud_table_type {\n    " )
304        enum = [ self.TableInfo[ k ][ 'name' ] for k in self.TableInfo.keys() ]
305        self.ItabH.write( ",\n    ".join( enum ) )
306        self.ItabH.write( "\n};\n\n" );
307
308        # mnemonic enumeration
309        self.ItabH.write( "/* ud_mnemonic -- mnemonic constants */\n" )
310        enum  = "enum ud_mnemonic_code {\n    "
311        enum += ",\n    ".join( [ "UD_I%s" % m for m in self.MnemonicsTable ] )
312        enum += "\n} UD_ATTR_PACKED;\n"
313        self.ItabH.write( enum )
314        self.ItabH.write( "\n" )
315
316        self.ItabH.write("\n/* itab entry operand definitions */\n");
317        operands = self.OperandDict.keys()
318        operands.sort()
319        for o in operands:
320            self.ItabH.write("#define O_%-7s { %-12s %-8s }\n" %
321                    (o, self.OperandDict[o][0] + ",", self.OperandDict[o][1]));
322        self.ItabH.write("\n\n");
323
324        self.ItabH.write( "extern const char * ud_mnemonics_str[];\n" )
325
326        self.ItabH.write( "#define GROUP(n) (0x8000 | (n))" )
327
328        self.ItabH.write( "\n#endif /* UD_ITAB_H */\n" )
329
330        self.ItabH.close()
331
332
333    def genItabC( self ):
334        self.ItabC = open( os.path.join(self.outputDir, "udis86_itab.c"), "w" )
335        self.ItabC.write( "/* itab.c -- generated by itab.py, do no edit" )
336        self.ItabC.write( " */\n" );
337        self.ItabC.write( "#include \"udis86_decode.h\"\n\n" );
338
339        self.genLookupTable( self.OpcodeTable0 )
340        self.genLookupTableList()
341        self.genInsnTable()
342
343        self.ItabC.close()
344
345    def genItab( self ):
346        self.genItabC()
347        self.genItabH()
348
349def main():
350    parser = OptionParser()
351    parser.add_option("--outputDir", dest="outputDir", default="")
352    options, args = parser.parse_args()
353    generator = UdItabGenerator(os.path.normpath(options.outputDir))
354    optableXmlParser = ud_optable.UdOptableXmlParser()
355    optableXmlParser.parse( args[ 0 ], generator.addInsnDef )
356
357    generator.genItab()
358
359if __name__ == '__main__':
360    main()
361