X86DisassemblerDecoder.h revision 218893
1/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the public interface of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16#ifndef X86DISASSEMBLERDECODER_H 17#define X86DISASSEMBLERDECODER_H 18 19#ifdef __cplusplus 20extern "C" { 21#endif 22 23#define INSTRUCTION_SPECIFIER_FIELDS \ 24 const char* name; 25 26#define INSTRUCTION_IDS \ 27 const InstrUID *instructionIDs; 28 29#include "X86DisassemblerDecoderCommon.h" 30 31#undef INSTRUCTION_SPECIFIER_FIELDS 32#undef INSTRUCTION_IDS 33 34/* 35 * Accessor functions for various fields of an Intel instruction 36 */ 37#define modFromModRM(modRM) ((modRM & 0xc0) >> 6) 38#define regFromModRM(modRM) ((modRM & 0x38) >> 3) 39#define rmFromModRM(modRM) (modRM & 0x7) 40#define scaleFromSIB(sib) ((sib & 0xc0) >> 6) 41#define indexFromSIB(sib) ((sib & 0x38) >> 3) 42#define baseFromSIB(sib) (sib & 0x7) 43#define wFromREX(rex) ((rex & 0x8) >> 3) 44#define rFromREX(rex) ((rex & 0x4) >> 2) 45#define xFromREX(rex) ((rex & 0x2) >> 1) 46#define bFromREX(rex) (rex & 0x1) 47 48/* 49 * These enums represent Intel registers for use by the decoder. 50 */ 51 52#define REGS_8BIT \ 53 ENTRY(AL) \ 54 ENTRY(CL) \ 55 ENTRY(DL) \ 56 ENTRY(BL) \ 57 ENTRY(AH) \ 58 ENTRY(CH) \ 59 ENTRY(DH) \ 60 ENTRY(BH) \ 61 ENTRY(R8B) \ 62 ENTRY(R9B) \ 63 ENTRY(R10B) \ 64 ENTRY(R11B) \ 65 ENTRY(R12B) \ 66 ENTRY(R13B) \ 67 ENTRY(R14B) \ 68 ENTRY(R15B) \ 69 ENTRY(SPL) \ 70 ENTRY(BPL) \ 71 ENTRY(SIL) \ 72 ENTRY(DIL) 73 74#define EA_BASES_16BIT \ 75 ENTRY(BX_SI) \ 76 ENTRY(BX_DI) \ 77 ENTRY(BP_SI) \ 78 ENTRY(BP_DI) \ 79 ENTRY(SI) \ 80 ENTRY(DI) \ 81 ENTRY(BP) \ 82 ENTRY(BX) \ 83 ENTRY(R8W) \ 84 ENTRY(R9W) \ 85 ENTRY(R10W) \ 86 ENTRY(R11W) \ 87 ENTRY(R12W) \ 88 ENTRY(R13W) \ 89 ENTRY(R14W) \ 90 ENTRY(R15W) 91 92#define REGS_16BIT \ 93 ENTRY(AX) \ 94 ENTRY(CX) \ 95 ENTRY(DX) \ 96 ENTRY(BX) \ 97 ENTRY(SP) \ 98 ENTRY(BP) \ 99 ENTRY(SI) \ 100 ENTRY(DI) \ 101 ENTRY(R8W) \ 102 ENTRY(R9W) \ 103 ENTRY(R10W) \ 104 ENTRY(R11W) \ 105 ENTRY(R12W) \ 106 ENTRY(R13W) \ 107 ENTRY(R14W) \ 108 ENTRY(R15W) 109 110#define EA_BASES_32BIT \ 111 ENTRY(EAX) \ 112 ENTRY(ECX) \ 113 ENTRY(EDX) \ 114 ENTRY(EBX) \ 115 ENTRY(sib) \ 116 ENTRY(EBP) \ 117 ENTRY(ESI) \ 118 ENTRY(EDI) \ 119 ENTRY(R8D) \ 120 ENTRY(R9D) \ 121 ENTRY(R10D) \ 122 ENTRY(R11D) \ 123 ENTRY(R12D) \ 124 ENTRY(R13D) \ 125 ENTRY(R14D) \ 126 ENTRY(R15D) 127 128#define REGS_32BIT \ 129 ENTRY(EAX) \ 130 ENTRY(ECX) \ 131 ENTRY(EDX) \ 132 ENTRY(EBX) \ 133 ENTRY(ESP) \ 134 ENTRY(EBP) \ 135 ENTRY(ESI) \ 136 ENTRY(EDI) \ 137 ENTRY(R8D) \ 138 ENTRY(R9D) \ 139 ENTRY(R10D) \ 140 ENTRY(R11D) \ 141 ENTRY(R12D) \ 142 ENTRY(R13D) \ 143 ENTRY(R14D) \ 144 ENTRY(R15D) 145 146#define EA_BASES_64BIT \ 147 ENTRY(RAX) \ 148 ENTRY(RCX) \ 149 ENTRY(RDX) \ 150 ENTRY(RBX) \ 151 ENTRY(sib64) \ 152 ENTRY(RBP) \ 153 ENTRY(RSI) \ 154 ENTRY(RDI) \ 155 ENTRY(R8) \ 156 ENTRY(R9) \ 157 ENTRY(R10) \ 158 ENTRY(R11) \ 159 ENTRY(R12) \ 160 ENTRY(R13) \ 161 ENTRY(R14) \ 162 ENTRY(R15) 163 164#define REGS_64BIT \ 165 ENTRY(RAX) \ 166 ENTRY(RCX) \ 167 ENTRY(RDX) \ 168 ENTRY(RBX) \ 169 ENTRY(RSP) \ 170 ENTRY(RBP) \ 171 ENTRY(RSI) \ 172 ENTRY(RDI) \ 173 ENTRY(R8) \ 174 ENTRY(R9) \ 175 ENTRY(R10) \ 176 ENTRY(R11) \ 177 ENTRY(R12) \ 178 ENTRY(R13) \ 179 ENTRY(R14) \ 180 ENTRY(R15) 181 182#define REGS_MMX \ 183 ENTRY(MM0) \ 184 ENTRY(MM1) \ 185 ENTRY(MM2) \ 186 ENTRY(MM3) \ 187 ENTRY(MM4) \ 188 ENTRY(MM5) \ 189 ENTRY(MM6) \ 190 ENTRY(MM7) 191 192#define REGS_XMM \ 193 ENTRY(XMM0) \ 194 ENTRY(XMM1) \ 195 ENTRY(XMM2) \ 196 ENTRY(XMM3) \ 197 ENTRY(XMM4) \ 198 ENTRY(XMM5) \ 199 ENTRY(XMM6) \ 200 ENTRY(XMM7) \ 201 ENTRY(XMM8) \ 202 ENTRY(XMM9) \ 203 ENTRY(XMM10) \ 204 ENTRY(XMM11) \ 205 ENTRY(XMM12) \ 206 ENTRY(XMM13) \ 207 ENTRY(XMM14) \ 208 ENTRY(XMM15) 209 210#define REGS_SEGMENT \ 211 ENTRY(ES) \ 212 ENTRY(CS) \ 213 ENTRY(SS) \ 214 ENTRY(DS) \ 215 ENTRY(FS) \ 216 ENTRY(GS) 217 218#define REGS_DEBUG \ 219 ENTRY(DR0) \ 220 ENTRY(DR1) \ 221 ENTRY(DR2) \ 222 ENTRY(DR3) \ 223 ENTRY(DR4) \ 224 ENTRY(DR5) \ 225 ENTRY(DR6) \ 226 ENTRY(DR7) 227 228#define REGS_CONTROL \ 229 ENTRY(CR0) \ 230 ENTRY(CR1) \ 231 ENTRY(CR2) \ 232 ENTRY(CR3) \ 233 ENTRY(CR4) \ 234 ENTRY(CR5) \ 235 ENTRY(CR6) \ 236 ENTRY(CR7) \ 237 ENTRY(CR8) 238 239#define ALL_EA_BASES \ 240 EA_BASES_16BIT \ 241 EA_BASES_32BIT \ 242 EA_BASES_64BIT 243 244#define ALL_SIB_BASES \ 245 REGS_32BIT \ 246 REGS_64BIT 247 248#define ALL_REGS \ 249 REGS_8BIT \ 250 REGS_16BIT \ 251 REGS_32BIT \ 252 REGS_64BIT \ 253 REGS_MMX \ 254 REGS_XMM \ 255 REGS_SEGMENT \ 256 REGS_DEBUG \ 257 REGS_CONTROL \ 258 ENTRY(RIP) 259 260/* 261 * EABase - All possible values of the base field for effective-address 262 * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We 263 * distinguish between bases (EA_BASE_*) and registers that just happen to be 264 * referred to when Mod == 0b11 (EA_REG_*). 265 */ 266typedef enum { 267 EA_BASE_NONE, 268#define ENTRY(x) EA_BASE_##x, 269 ALL_EA_BASES 270#undef ENTRY 271#define ENTRY(x) EA_REG_##x, 272 ALL_REGS 273#undef ENTRY 274 EA_max 275} EABase; 276 277/* 278 * SIBIndex - All possible values of the SIB index field. 279 * Borrows entries from ALL_EA_BASES with the special case that 280 * sib is synonymous with NONE. 281 */ 282typedef enum { 283 SIB_INDEX_NONE, 284#define ENTRY(x) SIB_INDEX_##x, 285 ALL_EA_BASES 286#undef ENTRY 287 SIB_INDEX_max 288} SIBIndex; 289 290/* 291 * SIBBase - All possible values of the SIB base field. 292 */ 293typedef enum { 294 SIB_BASE_NONE, 295#define ENTRY(x) SIB_BASE_##x, 296 ALL_SIB_BASES 297#undef ENTRY 298 SIB_BASE_max 299} SIBBase; 300 301/* 302 * EADisplacement - Possible displacement types for effective-address 303 * computations. 304 */ 305typedef enum { 306 EA_DISP_NONE, 307 EA_DISP_8, 308 EA_DISP_16, 309 EA_DISP_32 310} EADisplacement; 311 312/* 313 * Reg - All possible values of the reg field in the ModR/M byte. 314 */ 315typedef enum { 316#define ENTRY(x) MODRM_REG_##x, 317 ALL_REGS 318#undef ENTRY 319 MODRM_REG_max 320} Reg; 321 322/* 323 * SegmentOverride - All possible segment overrides. 324 */ 325typedef enum { 326 SEG_OVERRIDE_NONE, 327 SEG_OVERRIDE_CS, 328 SEG_OVERRIDE_SS, 329 SEG_OVERRIDE_DS, 330 SEG_OVERRIDE_ES, 331 SEG_OVERRIDE_FS, 332 SEG_OVERRIDE_GS, 333 SEG_OVERRIDE_max 334} SegmentOverride; 335 336typedef uint8_t BOOL; 337 338/* 339 * byteReader_t - Type for the byte reader that the consumer must provide to 340 * the decoder. Reads a single byte from the instruction's address space. 341 * @param arg - A baton that the consumer can associate with any internal 342 * state that it needs. 343 * @param byte - A pointer to a single byte in memory that should be set to 344 * contain the value at address. 345 * @param address - The address in the instruction's address space that should 346 * be read from. 347 * @return - -1 if the byte cannot be read for any reason; 0 otherwise. 348 */ 349typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address); 350 351/* 352 * dlog_t - Type for the logging function that the consumer can provide to 353 * get debugging output from the decoder. 354 * @param arg - A baton that the consumer can associate with any internal 355 * state that it needs. 356 * @param log - A string that contains the message. Will be reused after 357 * the logger returns. 358 */ 359typedef void (*dlog_t)(void* arg, const char *log); 360 361/* 362 * The x86 internal instruction, which is produced by the decoder. 363 */ 364struct InternalInstruction { 365 /* Reader interface (C) */ 366 byteReader_t reader; 367 /* Opaque value passed to the reader */ 368 void* readerArg; 369 /* The address of the next byte to read via the reader */ 370 uint64_t readerCursor; 371 372 /* Logger interface (C) */ 373 dlog_t dlog; 374 /* Opaque value passed to the logger */ 375 void* dlogArg; 376 377 /* General instruction information */ 378 379 /* The mode to disassemble for (64-bit, protected, real) */ 380 DisassemblerMode mode; 381 /* The start of the instruction, usable with the reader */ 382 uint64_t startLocation; 383 /* The length of the instruction, in bytes */ 384 size_t length; 385 386 /* Prefix state */ 387 388 /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ 389 uint8_t prefixPresent[0x100]; 390 /* contains the location (for use with the reader) of the prefix byte */ 391 uint64_t prefixLocations[0x100]; 392 /* The value of the REX prefix, if present */ 393 uint8_t rexPrefix; 394 /* The location of the REX prefix */ 395 uint64_t rexLocation; 396 /* The location where a mandatory prefix would have to be (i.e., right before 397 the opcode, or right before the REX prefix if one is present) */ 398 uint64_t necessaryPrefixLocation; 399 /* The segment override type */ 400 SegmentOverride segmentOverride; 401 402 /* Sizes of various critical pieces of data */ 403 uint8_t registerSize; 404 uint8_t addressSize; 405 uint8_t displacementSize; 406 uint8_t immediateSize; 407 408 /* opcode state */ 409 410 /* The value of the two-byte escape prefix (usually 0x0f) */ 411 uint8_t twoByteEscape; 412 /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */ 413 uint8_t threeByteEscape; 414 /* The last byte of the opcode, not counting any ModR/M extension */ 415 uint8_t opcode; 416 /* The ModR/M byte of the instruction, if it is an opcode extension */ 417 uint8_t modRMExtension; 418 419 /* decode state */ 420 421 /* The type of opcode, used for indexing into the array of decode tables */ 422 OpcodeType opcodeType; 423 /* The instruction ID, extracted from the decode table */ 424 uint16_t instructionID; 425 /* The specifier for the instruction, from the instruction info table */ 426 const struct InstructionSpecifier *spec; 427 428 /* state for additional bytes, consumed during operand decode. Pattern: 429 consumed___ indicates that the byte was already consumed and does not 430 need to be consumed again */ 431 432 /* The ModR/M byte, which contains most register operands and some portion of 433 all memory operands */ 434 BOOL consumedModRM; 435 uint8_t modRM; 436 437 /* The SIB byte, used for more complex 32- or 64-bit memory operands */ 438 BOOL consumedSIB; 439 uint8_t sib; 440 441 /* The displacement, used for memory operands */ 442 BOOL consumedDisplacement; 443 int32_t displacement; 444 445 /* Immediates. There can be two in some cases */ 446 uint8_t numImmediatesConsumed; 447 uint8_t numImmediatesTranslated; 448 uint64_t immediates[2]; 449 450 /* A register or immediate operand encoded into the opcode */ 451 BOOL consumedOpcodeModifier; 452 uint8_t opcodeModifier; 453 Reg opcodeRegister; 454 455 /* Portions of the ModR/M byte */ 456 457 /* These fields determine the allowable values for the ModR/M fields, which 458 depend on operand and address widths */ 459 EABase eaBaseBase; 460 EABase eaRegBase; 461 Reg regBase; 462 463 /* The Mod and R/M fields can encode a base for an effective address, or a 464 register. These are separated into two fields here */ 465 EABase eaBase; 466 EADisplacement eaDisplacement; 467 /* The reg field always encodes a register */ 468 Reg reg; 469 470 /* SIB state */ 471 SIBIndex sibIndex; 472 uint8_t sibScale; 473 SIBBase sibBase; 474}; 475 476/* decodeInstruction - Decode one instruction and store the decoding results in 477 * a buffer provided by the consumer. 478 * @param insn - The buffer to store the instruction in. Allocated by the 479 * consumer. 480 * @param reader - The byteReader_t for the bytes to be read. 481 * @param readerArg - An argument to pass to the reader for storing context 482 * specific to the consumer. May be NULL. 483 * @param logger - The dlog_t to be used in printing status messages from the 484 * disassembler. May be NULL. 485 * @param loggerArg - An argument to pass to the logger for storing context 486 * specific to the logger. May be NULL. 487 * @param startLoc - The address (in the reader's address space) of the first 488 * byte in the instruction. 489 * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in. 490 * @return - Nonzero if there was an error during decode, 0 otherwise. 491 */ 492int decodeInstruction(struct InternalInstruction* insn, 493 byteReader_t reader, 494 void* readerArg, 495 dlog_t logger, 496 void* loggerArg, 497 uint64_t startLoc, 498 DisassemblerMode mode); 499 500/* x86DisassemblerDebug - C-accessible function for printing a message to 501 * debugs() 502 * @param file - The name of the file printing the debug message. 503 * @param line - The line number that printed the debug message. 504 * @param s - The message to print. 505 */ 506 507void x86DisassemblerDebug(const char *file, 508 unsigned line, 509 const char *s); 510 511#ifdef __cplusplus 512} 513#endif 514 515#endif 516