1# udis86 - scripts/ud_opcode.py
2#
3# Copyright (c) 2009 Vivek Thampi
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without modification,
7# are permitted provided that the following conditions are met:
8#
9#     * Redistributions of source code must retain the above copyright notice,
10#       this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above copyright notice,
12#       this list of conditions and the following disclaimer in the documentation
13#       and/or other materials provided with the distribution.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26class UdOpcodeTables:
27
28    TableInfo = {
29        'opctbl'    : { 'name' : 'UD_TAB__OPC_TABLE',   'size' : 256 },
30        '/sse'      : { 'name' : 'UD_TAB__OPC_SSE',     'size' : 4 },
31        '/reg'      : { 'name' : 'UD_TAB__OPC_REG',     'size' : 8 },
32        '/rm'       : { 'name' : 'UD_TAB__OPC_RM',      'size' : 8 },
33        '/mod'      : { 'name' : 'UD_TAB__OPC_MOD',     'size' : 2 },
34        '/m'        : { 'name' : 'UD_TAB__OPC_MODE',    'size' : 3 },
35        '/x87'      : { 'name' : 'UD_TAB__OPC_X87',     'size' : 64 },
36        '/a'        : { 'name' : 'UD_TAB__OPC_ASIZE',   'size' : 3 },
37        '/o'        : { 'name' : 'UD_TAB__OPC_OSIZE',   'size' : 3 },
38        '/3dnow'    : { 'name' : 'UD_TAB__OPC_3DNOW',   'size' : 256 },
39        'vendor'    : { 'name' : 'UD_TAB__OPC_VENDOR',  'size' : 3 },
40    }
41
42    OpcodeTable0 = {
43        'type'      : 'opctbl',
44        'entries'   : {},
45        'meta'      : 'table0'
46    }
47
48    OpcExtIndex = {
49
50        # ssef2, ssef3, sse66
51        'sse': {
52            'none' : '00',
53            'f2'   : '01',
54            'f3'   : '02',
55            '66'   : '03'
56        },
57
58        # /mod=
59        'mod': {
60            '!11'   : '00',
61            '11'    : '01'
62        },
63
64        # /m=, /o=, /a=
65        'mode': {
66            '16'    : '00',
67            '32'    : '01',
68            '64'    : '02'
69        },
70
71        'vendor' : {
72            'amd'   : '00',
73            'intel' : '01',
74            'any'   : '02'
75        }
76    }
77
78    InsnTable = []
79    MnemonicsTable = []
80
81    ThreeDNowTable = {}
82
83    def sizeOfTable( self, t ):
84        return self.TableInfo[ t ][ 'size' ]
85
86    def nameOfTable( self, t ):
87        return self.TableInfo[ t ][ 'name' ]
88
89    #
90    # Updates a table entry: If the entry doesn't exist
91    # it will create the entry, otherwise, it will walk
92    # while validating the path.
93    #
94    def updateTable( self, table, index, type, meta ):
95        if not index in table[ 'entries' ]:
96            table[ 'entries' ][ index ] = { 'type' : type, 'entries' : {}, 'meta' : meta }
97        if table[ 'entries' ][ index ][ 'type' ] != type:
98            raise NameError( "error: violation in opcode mapping (overwrite) %s with %s." %
99                                ( table[ 'entries' ][ index ][ 'type' ], type) )
100        return table[ 'entries' ][ index ]
101
102    class Insn:
103        """An abstract type representing an instruction in the opcode map.
104        """
105
106        # A mapping of opcode extensions to their representational
107        # values used in the opcode map.
108        OpcExtMap = {
109            '/rm'    : lambda v: "%02x" % int(v, 16),
110            '/x87'   : lambda v: "%02x" % int(v, 16),
111            '/3dnow' : lambda v: "%02x" % int(v, 16),
112            '/reg'   : lambda v: "%02x" % int(v, 16),
113            # modrm.mod
114            # (!11, 11)    => (00, 01)
115            '/mod'   : lambda v: '00' if v == '!11' else '01',
116            # Mode extensions:
117            # (16, 32, 64) => (00, 01, 02)
118            '/o'     : lambda v: "%02x" % (int(v) / 32),
119            '/a'     : lambda v: "%02x" % (int(v) / 32),
120            '/m'     : lambda v: "%02x" % (int(v) / 32),
121            '/sse'   : lambda v: UdOpcodeTables.OpcExtIndex['sse'][v]
122        }
123
124        def __init__(self, prefixes, mnemonic, opcodes, operands, vendor):
125            self.opcodes  = opcodes
126            self.prefixes = prefixes
127            self.mnemonic = mnemonic
128            self.operands = operands
129            self.vendor   = vendor
130            self.opcext   = {}
131
132            ssePrefix = None
133            if self.opcodes[0] in ('ssef2', 'ssef3', 'sse66'):
134                ssePrefix = self.opcodes[0][3:]
135                self.opcodes.pop(0)
136
137            # do some preliminary decoding of the instruction type
138            # 1byte, 2byte or 3byte instruction?
139            self.nByteInsn = 1
140            if self.opcodes[0] == '0f': # 2byte
141                # 2+ byte opcodes are always disambiguated by an
142                # sse prefix, unless it is a 3d now instruction
143                # which is 0f 0f ...
144                if self.opcodes[1] != '0f' and ssePrefix is None:
145                    ssePrefix = 'none'
146                if self.opcodes[1] in ('38', '3a'): # 3byte
147                    self.nByteInsn = 3
148                else:
149                    self.nByteInsn = 2
150
151            # The opcode that indexes into the opcode table.
152            self.opcode = self.opcodes[self.nByteInsn - 1]
153
154            # Record opcode extensions
155            for opcode in self.opcodes[self.nByteInsn:]:
156                arg, val = opcode.split('=')
157                self.opcext[arg] = self.OpcExtMap[arg](val)
158
159            # Record sse extension: the reason sse extension is handled
160            # separately is that historically sse was handled as a first
161            # class opcode, not as an extension. Now that sse is handled
162            # as an extension, we do the manual conversion here, as opposed
163            # to modifying the opcode xml file.
164            if ssePrefix is not None:
165                self.opcext['/sse'] = self.OpcExtMap['/sse'](ssePrefix)
166
167    def parse(self, table, insn):
168        index = insn.opcodes[0];
169        if insn.nByteInsn > 1:
170            assert index == '0f'
171            table = self.updateTable(table, index, 'opctbl', '0f')
172            index = insn.opcodes[1]
173
174            if insn.nByteInsn == 3:
175                table = self.updateTable(table, index, 'opctbl', index)
176                index = insn.opcodes[2]
177
178        # Walk down the tree, create levels as needed, for opcode
179        # extensions. The order is important, and determines how
180        # well the opcode table is packed. Also note, /sse must be
181        # before /o, because /sse may consume operand size prefix
182        # affect the outcome of /o.
183        for ext in ('/mod', '/x87', '/reg', '/rm', '/sse',
184                    '/o',   '/a',   '/m',   '/3dnow'):
185            if ext in insn.opcext:
186                table = self.updateTable(table, index, ext, ext)
187                index = insn.opcext[ext]
188
189        # additional table for disambiguating vendor
190        if len(insn.vendor):
191            table = self.updateTable(table, index, 'vendor', insn.vendor)
192            index = self.OpcExtIndex['vendor'][insn.vendor]
193
194        # make leaf node entries
195        leaf = self.updateTable(table, index, 'insn', '')
196
197        leaf['mnemonic'] = insn.mnemonic
198        leaf['prefixes'] = insn.prefixes
199        leaf['operands'] = insn.operands
200
201        # add instruction to linear table of instruction forms
202        self.InsnTable.append({ 'prefixes' : insn.prefixes,
203                                'mnemonic' : insn.mnemonic,
204                                'operands' : insn.operands })
205
206        # add mnemonic to mnemonic table
207        if not insn.mnemonic in self.MnemonicsTable:
208            self.MnemonicsTable.append(insn.mnemonic)
209
210
211    # Adds an instruction definition to the opcode tables
212    def addInsnDef( self, prefixes, mnemonic, opcodes, operands, vendor ):
213        insn = self.Insn(prefixes=prefixes,
214                    mnemonic=mnemonic,
215                    opcodes=opcodes,
216                    operands=operands,
217                    vendor=vendor)
218        self.parse(self.OpcodeTable0, insn)
219
220    def print_table( self, table, pfxs ):
221        print "%s   |" % pfxs
222        keys = table[ 'entries' ].keys()
223        if ( len( keys ) ):
224            keys.sort()
225        for idx in keys:
226            e = table[ 'entries' ][ idx ]
227            if e[ 'type' ] == 'insn':
228                print "%s   |-<%s>" % ( pfxs, idx ),
229                print  "%s %s" % ( e[ 'mnemonic' ], ' '.join( e[ 'operands'] ) )
230            else:
231                print "%s   |-<%s> %s" % ( pfxs, idx, e['type'] )
232                self.print_table( e, pfxs + '   |' )
233
234    def print_tree( self ):
235        self.print_table( self.OpcodeTable0, '' )
236