1# udis86 - scripts/ud_opcode.py 2# 3# Copyright (c) 2009 Vivek Thampi 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without modification, 7# are permitted provided that the following conditions are met: 8# 9# * Redistributions of source code must retain the above copyright notice, 10# this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above copyright notice, 12# this list of conditions and the following disclaimer in the documentation 13# and/or other materials provided with the distribution. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 22# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 26class UdOpcodeTables: 27 28 TableInfo = { 29 'opctbl' : { 'name' : 'UD_TAB__OPC_TABLE', 'size' : 256 }, 30 '/sse' : { 'name' : 'UD_TAB__OPC_SSE', 'size' : 4 }, 31 '/reg' : { 'name' : 'UD_TAB__OPC_REG', 'size' : 8 }, 32 '/rm' : { 'name' : 'UD_TAB__OPC_RM', 'size' : 8 }, 33 '/mod' : { 'name' : 'UD_TAB__OPC_MOD', 'size' : 2 }, 34 '/m' : { 'name' : 'UD_TAB__OPC_MODE', 'size' : 3 }, 35 '/x87' : { 'name' : 'UD_TAB__OPC_X87', 'size' : 64 }, 36 '/a' : { 'name' : 'UD_TAB__OPC_ASIZE', 'size' : 3 }, 37 '/o' : { 'name' : 'UD_TAB__OPC_OSIZE', 'size' : 3 }, 38 '/3dnow' : { 'name' : 'UD_TAB__OPC_3DNOW', 'size' : 256 }, 39 'vendor' : { 'name' : 'UD_TAB__OPC_VENDOR', 'size' : 3 }, 40 } 41 42 OpcodeTable0 = { 43 'type' : 'opctbl', 44 'entries' : {}, 45 'meta' : 'table0' 46 } 47 48 OpcExtIndex = { 49 50 # ssef2, ssef3, sse66 51 'sse': { 52 'none' : '00', 53 'f2' : '01', 54 'f3' : '02', 55 '66' : '03' 56 }, 57 58 # /mod= 59 'mod': { 60 '!11' : '00', 61 '11' : '01' 62 }, 63 64 # /m=, /o=, /a= 65 'mode': { 66 '16' : '00', 67 '32' : '01', 68 '64' : '02' 69 }, 70 71 'vendor' : { 72 'amd' : '00', 73 'intel' : '01', 74 'any' : '02' 75 } 76 } 77 78 InsnTable = [] 79 MnemonicsTable = [] 80 81 ThreeDNowTable = {} 82 83 def sizeOfTable( self, t ): 84 return self.TableInfo[ t ][ 'size' ] 85 86 def nameOfTable( self, t ): 87 return self.TableInfo[ t ][ 'name' ] 88 89 # 90 # Updates a table entry: If the entry doesn't exist 91 # it will create the entry, otherwise, it will walk 92 # while validating the path. 93 # 94 def updateTable( self, table, index, type, meta ): 95 if not index in table[ 'entries' ]: 96 table[ 'entries' ][ index ] = { 'type' : type, 'entries' : {}, 'meta' : meta } 97 if table[ 'entries' ][ index ][ 'type' ] != type: 98 raise NameError( "error: violation in opcode mapping (overwrite) %s with %s." % 99 ( table[ 'entries' ][ index ][ 'type' ], type) ) 100 return table[ 'entries' ][ index ] 101 102 class Insn: 103 """An abstract type representing an instruction in the opcode map. 104 """ 105 106 # A mapping of opcode extensions to their representational 107 # values used in the opcode map. 108 OpcExtMap = { 109 '/rm' : lambda v: "%02x" % int(v, 16), 110 '/x87' : lambda v: "%02x" % int(v, 16), 111 '/3dnow' : lambda v: "%02x" % int(v, 16), 112 '/reg' : lambda v: "%02x" % int(v, 16), 113 # modrm.mod 114 # (!11, 11) => (00, 01) 115 '/mod' : lambda v: '00' if v == '!11' else '01', 116 # Mode extensions: 117 # (16, 32, 64) => (00, 01, 02) 118 '/o' : lambda v: "%02x" % (int(v) / 32), 119 '/a' : lambda v: "%02x" % (int(v) / 32), 120 '/m' : lambda v: "%02x" % (int(v) / 32), 121 '/sse' : lambda v: UdOpcodeTables.OpcExtIndex['sse'][v] 122 } 123 124 def __init__(self, prefixes, mnemonic, opcodes, operands, vendor): 125 self.opcodes = opcodes 126 self.prefixes = prefixes 127 self.mnemonic = mnemonic 128 self.operands = operands 129 self.vendor = vendor 130 self.opcext = {} 131 132 ssePrefix = None 133 if self.opcodes[0] in ('ssef2', 'ssef3', 'sse66'): 134 ssePrefix = self.opcodes[0][3:] 135 self.opcodes.pop(0) 136 137 # do some preliminary decoding of the instruction type 138 # 1byte, 2byte or 3byte instruction? 139 self.nByteInsn = 1 140 if self.opcodes[0] == '0f': # 2byte 141 # 2+ byte opcodes are always disambiguated by an 142 # sse prefix, unless it is a 3d now instruction 143 # which is 0f 0f ... 144 if self.opcodes[1] != '0f' and ssePrefix is None: 145 ssePrefix = 'none' 146 if self.opcodes[1] in ('38', '3a'): # 3byte 147 self.nByteInsn = 3 148 else: 149 self.nByteInsn = 2 150 151 # The opcode that indexes into the opcode table. 152 self.opcode = self.opcodes[self.nByteInsn - 1] 153 154 # Record opcode extensions 155 for opcode in self.opcodes[self.nByteInsn:]: 156 arg, val = opcode.split('=') 157 self.opcext[arg] = self.OpcExtMap[arg](val) 158 159 # Record sse extension: the reason sse extension is handled 160 # separately is that historically sse was handled as a first 161 # class opcode, not as an extension. Now that sse is handled 162 # as an extension, we do the manual conversion here, as opposed 163 # to modifying the opcode xml file. 164 if ssePrefix is not None: 165 self.opcext['/sse'] = self.OpcExtMap['/sse'](ssePrefix) 166 167 def parse(self, table, insn): 168 index = insn.opcodes[0]; 169 if insn.nByteInsn > 1: 170 assert index == '0f' 171 table = self.updateTable(table, index, 'opctbl', '0f') 172 index = insn.opcodes[1] 173 174 if insn.nByteInsn == 3: 175 table = self.updateTable(table, index, 'opctbl', index) 176 index = insn.opcodes[2] 177 178 # Walk down the tree, create levels as needed, for opcode 179 # extensions. The order is important, and determines how 180 # well the opcode table is packed. Also note, /sse must be 181 # before /o, because /sse may consume operand size prefix 182 # affect the outcome of /o. 183 for ext in ('/mod', '/x87', '/reg', '/rm', '/sse', 184 '/o', '/a', '/m', '/3dnow'): 185 if ext in insn.opcext: 186 table = self.updateTable(table, index, ext, ext) 187 index = insn.opcext[ext] 188 189 # additional table for disambiguating vendor 190 if len(insn.vendor): 191 table = self.updateTable(table, index, 'vendor', insn.vendor) 192 index = self.OpcExtIndex['vendor'][insn.vendor] 193 194 # make leaf node entries 195 leaf = self.updateTable(table, index, 'insn', '') 196 197 leaf['mnemonic'] = insn.mnemonic 198 leaf['prefixes'] = insn.prefixes 199 leaf['operands'] = insn.operands 200 201 # add instruction to linear table of instruction forms 202 self.InsnTable.append({ 'prefixes' : insn.prefixes, 203 'mnemonic' : insn.mnemonic, 204 'operands' : insn.operands }) 205 206 # add mnemonic to mnemonic table 207 if not insn.mnemonic in self.MnemonicsTable: 208 self.MnemonicsTable.append(insn.mnemonic) 209 210 211 # Adds an instruction definition to the opcode tables 212 def addInsnDef( self, prefixes, mnemonic, opcodes, operands, vendor ): 213 insn = self.Insn(prefixes=prefixes, 214 mnemonic=mnemonic, 215 opcodes=opcodes, 216 operands=operands, 217 vendor=vendor) 218 self.parse(self.OpcodeTable0, insn) 219 220 def print_table( self, table, pfxs ): 221 print "%s |" % pfxs 222 keys = table[ 'entries' ].keys() 223 if ( len( keys ) ): 224 keys.sort() 225 for idx in keys: 226 e = table[ 'entries' ][ idx ] 227 if e[ 'type' ] == 'insn': 228 print "%s |-<%s>" % ( pfxs, idx ), 229 print "%s %s" % ( e[ 'mnemonic' ], ' '.join( e[ 'operands'] ) ) 230 else: 231 print "%s |-<%s> %s" % ( pfxs, idx, e['type'] ) 232 self.print_table( e, pfxs + ' |' ) 233 234 def print_tree( self ): 235 self.print_table( self.OpcodeTable0, '' ) 236