1/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16#include <stdarg.h> /* for va_*() */ 17#include <stdio.h> /* for vsnprintf() */ 18#include <stdlib.h> /* for exit() */ 19#include <string.h> /* for memset() */ 20 21#include "X86DisassemblerDecoder.h" 22 23#include "X86GenDisassemblerTables.inc" 24 25#define TRUE 1 26#define FALSE 0 27 28typedef int8_t bool; 29 30#ifndef NDEBUG 31#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 32#else 33#define debug(s) do { } while (0) 34#endif 35 36 37/* 38 * contextForAttrs - Client for the instruction context table. Takes a set of 39 * attributes and returns the appropriate decode context. 40 * 41 * @param attrMask - Attributes, from the enumeration attributeBits. 42 * @return - The InstructionContext to use when looking up an 43 * an instruction with these attributes. 44 */ 45static InstructionContext contextForAttrs(uint8_t attrMask) { 46 return CONTEXTS_SYM[attrMask]; 47} 48 49/* 50 * modRMRequired - Reads the appropriate instruction table to determine whether 51 * the ModR/M byte is required to decode a particular instruction. 52 * 53 * @param type - The opcode type (i.e., how many bytes it has). 54 * @param insnContext - The context for the instruction, as returned by 55 * contextForAttrs. 56 * @param opcode - The last byte of the instruction's opcode, not counting 57 * ModR/M extensions and escapes. 58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 59 */ 60static int modRMRequired(OpcodeType type, 61 InstructionContext insnContext, 62 uint8_t opcode) { 63 const struct ContextDecision* decision = 0; 64 65 switch (type) { 66 case ONEBYTE: 67 decision = &ONEBYTE_SYM; 68 break; 69 case TWOBYTE: 70 decision = &TWOBYTE_SYM; 71 break; 72 case THREEBYTE_38: 73 decision = &THREEBYTE38_SYM; 74 break; 75 case THREEBYTE_3A: 76 decision = &THREEBYTE3A_SYM; 77 break; 78 case THREEBYTE_A6: 79 decision = &THREEBYTEA6_SYM; 80 break; 81 case THREEBYTE_A7: 82 decision = &THREEBYTEA7_SYM; 83 break; 84 } 85 86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 87 modrm_type != MODRM_ONEENTRY; 88} 89 90/* 91 * decode - Reads the appropriate instruction table to obtain the unique ID of 92 * an instruction. 93 * 94 * @param type - See modRMRequired(). 95 * @param insnContext - See modRMRequired(). 96 * @param opcode - See modRMRequired(). 97 * @param modRM - The ModR/M byte if required, or any value if not. 98 * @return - The UID of the instruction, or 0 on failure. 99 */ 100static InstrUID decode(OpcodeType type, 101 InstructionContext insnContext, 102 uint8_t opcode, 103 uint8_t modRM) { 104 const struct ModRMDecision* dec = 0; 105 106 switch (type) { 107 case ONEBYTE: 108 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 109 break; 110 case TWOBYTE: 111 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 112 break; 113 case THREEBYTE_38: 114 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 115 break; 116 case THREEBYTE_3A: 117 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 118 break; 119 case THREEBYTE_A6: 120 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 121 break; 122 case THREEBYTE_A7: 123 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 124 break; 125 } 126 127 switch (dec->modrm_type) { 128 default: 129 debug("Corrupt table! Unknown modrm_type"); 130 return 0; 131 case MODRM_ONEENTRY: 132 return modRMTable[dec->instructionIDs]; 133 case MODRM_SPLITRM: 134 if (modFromModRM(modRM) == 0x3) 135 return modRMTable[dec->instructionIDs+1]; 136 return modRMTable[dec->instructionIDs]; 137 case MODRM_SPLITREG: 138 if (modFromModRM(modRM) == 0x3) 139 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 140 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 141 case MODRM_SPLITMISC: 142 if (modFromModRM(modRM) == 0x3) 143 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 144 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 145 case MODRM_FULL: 146 return modRMTable[dec->instructionIDs+modRM]; 147 } 148} 149 150/* 151 * specifierForUID - Given a UID, returns the name and operand specification for 152 * that instruction. 153 * 154 * @param uid - The unique ID for the instruction. This should be returned by 155 * decode(); specifierForUID will not check bounds. 156 * @return - A pointer to the specification for that instruction. 157 */ 158static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 159 return &INSTRUCTIONS_SYM[uid]; 160} 161 162/* 163 * consumeByte - Uses the reader function provided by the user to consume one 164 * byte from the instruction's memory and advance the cursor. 165 * 166 * @param insn - The instruction with the reader function to use. The cursor 167 * for this instruction is advanced. 168 * @param byte - A pointer to a pre-allocated memory buffer to be populated 169 * with the data read. 170 * @return - 0 if the read was successful; nonzero otherwise. 171 */ 172static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 173 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 174 175 if (!ret) 176 ++(insn->readerCursor); 177 178 return ret; 179} 180 181/* 182 * lookAtByte - Like consumeByte, but does not advance the cursor. 183 * 184 * @param insn - See consumeByte(). 185 * @param byte - See consumeByte(). 186 * @return - See consumeByte(). 187 */ 188static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 189 return insn->reader(insn->readerArg, byte, insn->readerCursor); 190} 191 192static void unconsumeByte(struct InternalInstruction* insn) { 193 insn->readerCursor--; 194} 195 196#define CONSUME_FUNC(name, type) \ 197 static int name(struct InternalInstruction* insn, type* ptr) { \ 198 type combined = 0; \ 199 unsigned offset; \ 200 for (offset = 0; offset < sizeof(type); ++offset) { \ 201 uint8_t byte; \ 202 int ret = insn->reader(insn->readerArg, \ 203 &byte, \ 204 insn->readerCursor + offset); \ 205 if (ret) \ 206 return ret; \ 207 combined = combined | ((uint64_t)byte << (offset * 8)); \ 208 } \ 209 *ptr = combined; \ 210 insn->readerCursor += sizeof(type); \ 211 return 0; \ 212 } 213 214/* 215 * consume* - Use the reader function provided by the user to consume data 216 * values of various sizes from the instruction's memory and advance the 217 * cursor appropriately. These readers perform endian conversion. 218 * 219 * @param insn - See consumeByte(). 220 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 221 * be populated with the data read. 222 * @return - See consumeByte(). 223 */ 224CONSUME_FUNC(consumeInt8, int8_t) 225CONSUME_FUNC(consumeInt16, int16_t) 226CONSUME_FUNC(consumeInt32, int32_t) 227CONSUME_FUNC(consumeUInt16, uint16_t) 228CONSUME_FUNC(consumeUInt32, uint32_t) 229CONSUME_FUNC(consumeUInt64, uint64_t) 230 231/* 232 * dbgprintf - Uses the logging function provided by the user to log a single 233 * message, typically without a carriage-return. 234 * 235 * @param insn - The instruction containing the logging function. 236 * @param format - See printf(). 237 * @param ... - See printf(). 238 */ 239static void dbgprintf(struct InternalInstruction* insn, 240 const char* format, 241 ...) { 242 char buffer[256]; 243 va_list ap; 244 245 if (!insn->dlog) 246 return; 247 248 va_start(ap, format); 249 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 250 va_end(ap); 251 252 insn->dlog(insn->dlogArg, buffer); 253 254 return; 255} 256 257/* 258 * setPrefixPresent - Marks that a particular prefix is present at a particular 259 * location. 260 * 261 * @param insn - The instruction to be marked as having the prefix. 262 * @param prefix - The prefix that is present. 263 * @param location - The location where the prefix is located (in the address 264 * space of the instruction's reader). 265 */ 266static void setPrefixPresent(struct InternalInstruction* insn, 267 uint8_t prefix, 268 uint64_t location) 269{ 270 insn->prefixPresent[prefix] = 1; 271 insn->prefixLocations[prefix] = location; 272} 273 274/* 275 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 276 * present at a given location. 277 * 278 * @param insn - The instruction to be queried. 279 * @param prefix - The prefix. 280 * @param location - The location to query. 281 * @return - Whether the prefix is at that location. 282 */ 283static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 284 uint8_t prefix, 285 uint64_t location) 286{ 287 if (insn->prefixPresent[prefix] == 1 && 288 insn->prefixLocations[prefix] == location) 289 return TRUE; 290 else 291 return FALSE; 292} 293 294/* 295 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 296 * instruction as having them. Also sets the instruction's default operand, 297 * address, and other relevant data sizes to report operands correctly. 298 * 299 * @param insn - The instruction whose prefixes are to be read. 300 * @return - 0 if the instruction could be read until the end of the prefix 301 * bytes, and no prefixes conflicted; nonzero otherwise. 302 */ 303static int readPrefixes(struct InternalInstruction* insn) { 304 BOOL isPrefix = TRUE; 305 BOOL prefixGroups[4] = { FALSE }; 306 uint64_t prefixLocation; 307 uint8_t byte = 0; 308 309 BOOL hasAdSize = FALSE; 310 BOOL hasOpSize = FALSE; 311 312 dbgprintf(insn, "readPrefixes()"); 313 314 while (isPrefix) { 315 prefixLocation = insn->readerCursor; 316 317 if (consumeByte(insn, &byte)) 318 return -1; 319 320 /* 321 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 322 * break and let it be disassembled as a normal "instruction". 323 */ 324 if (insn->readerCursor - 1 == insn->startLocation 325 && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) { 326 uint8_t nextByte; 327 if (byte == 0xf0) 328 break; 329 if (lookAtByte(insn, &nextByte)) 330 return -1; 331 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 332 if (consumeByte(insn, &nextByte)) 333 return -1; 334 if (lookAtByte(insn, &nextByte)) 335 return -1; 336 unconsumeByte(insn); 337 } 338 if (nextByte != 0x0f && nextByte != 0x90) 339 break; 340 } 341 342 switch (byte) { 343 case 0xf0: /* LOCK */ 344 case 0xf2: /* REPNE/REPNZ */ 345 case 0xf3: /* REP or REPE/REPZ */ 346 if (prefixGroups[0]) 347 dbgprintf(insn, "Redundant Group 1 prefix"); 348 prefixGroups[0] = TRUE; 349 setPrefixPresent(insn, byte, prefixLocation); 350 break; 351 case 0x2e: /* CS segment override -OR- Branch not taken */ 352 case 0x36: /* SS segment override -OR- Branch taken */ 353 case 0x3e: /* DS segment override */ 354 case 0x26: /* ES segment override */ 355 case 0x64: /* FS segment override */ 356 case 0x65: /* GS segment override */ 357 switch (byte) { 358 case 0x2e: 359 insn->segmentOverride = SEG_OVERRIDE_CS; 360 break; 361 case 0x36: 362 insn->segmentOverride = SEG_OVERRIDE_SS; 363 break; 364 case 0x3e: 365 insn->segmentOverride = SEG_OVERRIDE_DS; 366 break; 367 case 0x26: 368 insn->segmentOverride = SEG_OVERRIDE_ES; 369 break; 370 case 0x64: 371 insn->segmentOverride = SEG_OVERRIDE_FS; 372 break; 373 case 0x65: 374 insn->segmentOverride = SEG_OVERRIDE_GS; 375 break; 376 default: 377 debug("Unhandled override"); 378 return -1; 379 } 380 if (prefixGroups[1]) 381 dbgprintf(insn, "Redundant Group 2 prefix"); 382 prefixGroups[1] = TRUE; 383 setPrefixPresent(insn, byte, prefixLocation); 384 break; 385 case 0x66: /* Operand-size override */ 386 if (prefixGroups[2]) 387 dbgprintf(insn, "Redundant Group 3 prefix"); 388 prefixGroups[2] = TRUE; 389 hasOpSize = TRUE; 390 setPrefixPresent(insn, byte, prefixLocation); 391 break; 392 case 0x67: /* Address-size override */ 393 if (prefixGroups[3]) 394 dbgprintf(insn, "Redundant Group 4 prefix"); 395 prefixGroups[3] = TRUE; 396 hasAdSize = TRUE; 397 setPrefixPresent(insn, byte, prefixLocation); 398 break; 399 default: /* Not a prefix byte */ 400 isPrefix = FALSE; 401 break; 402 } 403 404 if (isPrefix) 405 dbgprintf(insn, "Found prefix 0x%hhx", byte); 406 } 407 408 insn->vexSize = 0; 409 410 if (byte == 0xc4) { 411 uint8_t byte1; 412 413 if (lookAtByte(insn, &byte1)) { 414 dbgprintf(insn, "Couldn't read second byte of VEX"); 415 return -1; 416 } 417 418 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 419 insn->vexSize = 3; 420 insn->necessaryPrefixLocation = insn->readerCursor - 1; 421 } 422 else { 423 unconsumeByte(insn); 424 insn->necessaryPrefixLocation = insn->readerCursor - 1; 425 } 426 427 if (insn->vexSize == 3) { 428 insn->vexPrefix[0] = byte; 429 consumeByte(insn, &insn->vexPrefix[1]); 430 consumeByte(insn, &insn->vexPrefix[2]); 431 432 /* We simulate the REX prefix for simplicity's sake */ 433 434 if (insn->mode == MODE_64BIT) { 435 insn->rexPrefix = 0x40 436 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) 437 | (rFromVEX2of3(insn->vexPrefix[1]) << 2) 438 | (xFromVEX2of3(insn->vexPrefix[1]) << 1) 439 | (bFromVEX2of3(insn->vexPrefix[1]) << 0); 440 } 441 442 switch (ppFromVEX3of3(insn->vexPrefix[2])) 443 { 444 default: 445 break; 446 case VEX_PREFIX_66: 447 hasOpSize = TRUE; 448 break; 449 } 450 451 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); 452 } 453 } 454 else if (byte == 0xc5) { 455 uint8_t byte1; 456 457 if (lookAtByte(insn, &byte1)) { 458 dbgprintf(insn, "Couldn't read second byte of VEX"); 459 return -1; 460 } 461 462 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 463 insn->vexSize = 2; 464 } 465 else { 466 unconsumeByte(insn); 467 } 468 469 if (insn->vexSize == 2) { 470 insn->vexPrefix[0] = byte; 471 consumeByte(insn, &insn->vexPrefix[1]); 472 473 if (insn->mode == MODE_64BIT) { 474 insn->rexPrefix = 0x40 475 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); 476 } 477 478 switch (ppFromVEX2of2(insn->vexPrefix[1])) 479 { 480 default: 481 break; 482 case VEX_PREFIX_66: 483 hasOpSize = TRUE; 484 break; 485 } 486 487 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); 488 } 489 } 490 else { 491 if (insn->mode == MODE_64BIT) { 492 if ((byte & 0xf0) == 0x40) { 493 uint8_t opcodeByte; 494 495 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 496 dbgprintf(insn, "Redundant REX prefix"); 497 return -1; 498 } 499 500 insn->rexPrefix = byte; 501 insn->necessaryPrefixLocation = insn->readerCursor - 2; 502 503 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 504 } else { 505 unconsumeByte(insn); 506 insn->necessaryPrefixLocation = insn->readerCursor - 1; 507 } 508 } else { 509 unconsumeByte(insn); 510 insn->necessaryPrefixLocation = insn->readerCursor - 1; 511 } 512 } 513 514 if (insn->mode == MODE_16BIT) { 515 insn->registerSize = (hasOpSize ? 4 : 2); 516 insn->addressSize = (hasAdSize ? 4 : 2); 517 insn->displacementSize = (hasAdSize ? 4 : 2); 518 insn->immediateSize = (hasOpSize ? 4 : 2); 519 } else if (insn->mode == MODE_32BIT) { 520 insn->registerSize = (hasOpSize ? 2 : 4); 521 insn->addressSize = (hasAdSize ? 2 : 4); 522 insn->displacementSize = (hasAdSize ? 2 : 4); 523 insn->immediateSize = (hasOpSize ? 2 : 4); 524 } else if (insn->mode == MODE_64BIT) { 525 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 526 insn->registerSize = 8; 527 insn->addressSize = (hasAdSize ? 4 : 8); 528 insn->displacementSize = 4; 529 insn->immediateSize = 4; 530 } else if (insn->rexPrefix) { 531 insn->registerSize = (hasOpSize ? 2 : 4); 532 insn->addressSize = (hasAdSize ? 4 : 8); 533 insn->displacementSize = (hasOpSize ? 2 : 4); 534 insn->immediateSize = (hasOpSize ? 2 : 4); 535 } else { 536 insn->registerSize = (hasOpSize ? 2 : 4); 537 insn->addressSize = (hasAdSize ? 4 : 8); 538 insn->displacementSize = (hasOpSize ? 2 : 4); 539 insn->immediateSize = (hasOpSize ? 2 : 4); 540 } 541 } 542 543 return 0; 544} 545 546/* 547 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 548 * extended or escape opcodes). 549 * 550 * @param insn - The instruction whose opcode is to be read. 551 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 552 */ 553static int readOpcode(struct InternalInstruction* insn) { 554 /* Determine the length of the primary opcode */ 555 556 uint8_t current; 557 558 dbgprintf(insn, "readOpcode()"); 559 560 insn->opcodeType = ONEBYTE; 561 562 if (insn->vexSize == 3) 563 { 564 switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) 565 { 566 default: 567 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); 568 return -1; 569 case 0: 570 break; 571 case VEX_LOB_0F: 572 insn->twoByteEscape = 0x0f; 573 insn->opcodeType = TWOBYTE; 574 return consumeByte(insn, &insn->opcode); 575 case VEX_LOB_0F38: 576 insn->twoByteEscape = 0x0f; 577 insn->threeByteEscape = 0x38; 578 insn->opcodeType = THREEBYTE_38; 579 return consumeByte(insn, &insn->opcode); 580 case VEX_LOB_0F3A: 581 insn->twoByteEscape = 0x0f; 582 insn->threeByteEscape = 0x3a; 583 insn->opcodeType = THREEBYTE_3A; 584 return consumeByte(insn, &insn->opcode); 585 } 586 } 587 else if (insn->vexSize == 2) 588 { 589 insn->twoByteEscape = 0x0f; 590 insn->opcodeType = TWOBYTE; 591 return consumeByte(insn, &insn->opcode); 592 } 593 594 if (consumeByte(insn, ¤t)) 595 return -1; 596 597 if (current == 0x0f) { 598 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 599 600 insn->twoByteEscape = current; 601 602 if (consumeByte(insn, ¤t)) 603 return -1; 604 605 if (current == 0x38) { 606 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 607 608 insn->threeByteEscape = current; 609 610 if (consumeByte(insn, ¤t)) 611 return -1; 612 613 insn->opcodeType = THREEBYTE_38; 614 } else if (current == 0x3a) { 615 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 616 617 insn->threeByteEscape = current; 618 619 if (consumeByte(insn, ¤t)) 620 return -1; 621 622 insn->opcodeType = THREEBYTE_3A; 623 } else if (current == 0xa6) { 624 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 625 626 insn->threeByteEscape = current; 627 628 if (consumeByte(insn, ¤t)) 629 return -1; 630 631 insn->opcodeType = THREEBYTE_A6; 632 } else if (current == 0xa7) { 633 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 634 635 insn->threeByteEscape = current; 636 637 if (consumeByte(insn, ¤t)) 638 return -1; 639 640 insn->opcodeType = THREEBYTE_A7; 641 } else { 642 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 643 644 insn->opcodeType = TWOBYTE; 645 } 646 } 647 648 /* 649 * At this point we have consumed the full opcode. 650 * Anything we consume from here on must be unconsumed. 651 */ 652 653 insn->opcode = current; 654 655 return 0; 656} 657 658static int readModRM(struct InternalInstruction* insn); 659 660/* 661 * getIDWithAttrMask - Determines the ID of an instruction, consuming 662 * the ModR/M byte as appropriate for extended and escape opcodes, 663 * and using a supplied attribute mask. 664 * 665 * @param instructionID - A pointer whose target is filled in with the ID of the 666 * instruction. 667 * @param insn - The instruction whose ID is to be determined. 668 * @param attrMask - The attribute mask to search. 669 * @return - 0 if the ModR/M could be read when needed or was not 670 * needed; nonzero otherwise. 671 */ 672static int getIDWithAttrMask(uint16_t* instructionID, 673 struct InternalInstruction* insn, 674 uint8_t attrMask) { 675 BOOL hasModRMExtension; 676 677 uint8_t instructionClass; 678 679 instructionClass = contextForAttrs(attrMask); 680 681 hasModRMExtension = modRMRequired(insn->opcodeType, 682 instructionClass, 683 insn->opcode); 684 685 if (hasModRMExtension) { 686 if (readModRM(insn)) 687 return -1; 688 689 *instructionID = decode(insn->opcodeType, 690 instructionClass, 691 insn->opcode, 692 insn->modRM); 693 } else { 694 *instructionID = decode(insn->opcodeType, 695 instructionClass, 696 insn->opcode, 697 0); 698 } 699 700 return 0; 701} 702 703/* 704 * is16BitEquivalent - Determines whether two instruction names refer to 705 * equivalent instructions but one is 16-bit whereas the other is not. 706 * 707 * @param orig - The instruction that is not 16-bit 708 * @param equiv - The instruction that is 16-bit 709 */ 710static BOOL is16BitEquivalent(const char* orig, const char* equiv) { 711 off_t i; 712 713 for (i = 0;; i++) { 714 if (orig[i] == '\0' && equiv[i] == '\0') 715 return TRUE; 716 if (orig[i] == '\0' || equiv[i] == '\0') 717 return FALSE; 718 if (orig[i] != equiv[i]) { 719 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 720 continue; 721 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 722 continue; 723 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 724 continue; 725 return FALSE; 726 } 727 } 728} 729 730/* 731 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 732 * appropriate for extended and escape opcodes. Determines the attributes and 733 * context for the instruction before doing so. 734 * 735 * @param insn - The instruction whose ID is to be determined. 736 * @return - 0 if the ModR/M could be read when needed or was not needed; 737 * nonzero otherwise. 738 */ 739static int getID(struct InternalInstruction* insn, const void *miiArg) { 740 uint8_t attrMask; 741 uint16_t instructionID; 742 743 dbgprintf(insn, "getID()"); 744 745 attrMask = ATTR_NONE; 746 747 if (insn->mode == MODE_64BIT) 748 attrMask |= ATTR_64BIT; 749 750 if (insn->vexSize) { 751 attrMask |= ATTR_VEX; 752 753 if (insn->vexSize == 3) { 754 switch (ppFromVEX3of3(insn->vexPrefix[2])) { 755 case VEX_PREFIX_66: 756 attrMask |= ATTR_OPSIZE; 757 break; 758 case VEX_PREFIX_F3: 759 attrMask |= ATTR_XS; 760 break; 761 case VEX_PREFIX_F2: 762 attrMask |= ATTR_XD; 763 break; 764 } 765 766 if (lFromVEX3of3(insn->vexPrefix[2])) 767 attrMask |= ATTR_VEXL; 768 } 769 else if (insn->vexSize == 2) { 770 switch (ppFromVEX2of2(insn->vexPrefix[1])) { 771 case VEX_PREFIX_66: 772 attrMask |= ATTR_OPSIZE; 773 break; 774 case VEX_PREFIX_F3: 775 attrMask |= ATTR_XS; 776 break; 777 case VEX_PREFIX_F2: 778 attrMask |= ATTR_XD; 779 break; 780 } 781 782 if (lFromVEX2of2(insn->vexPrefix[1])) 783 attrMask |= ATTR_VEXL; 784 } 785 else { 786 return -1; 787 } 788 } 789 else { 790 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 791 attrMask |= ATTR_OPSIZE; 792 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 793 attrMask |= ATTR_ADSIZE; 794 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 795 attrMask |= ATTR_XS; 796 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 797 attrMask |= ATTR_XD; 798 } 799 800 if (insn->rexPrefix & 0x08) 801 attrMask |= ATTR_REXW; 802 803 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 804 return -1; 805 806 /* The following clauses compensate for limitations of the tables. */ 807 808 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && 809 !(attrMask & ATTR_OPSIZE)) { 810 /* 811 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit 812 * has precedence since there are no L-bit with W-bit entries in the tables. 813 * So if the L-bit isn't significant we should use the W-bit instead. 814 * We only need to do this if the instruction doesn't specify OpSize since 815 * there is a VEX_L_W_OPSIZE table. 816 */ 817 818 const struct InstructionSpecifier *spec; 819 uint16_t instructionIDWithWBit; 820 const struct InstructionSpecifier *specWithWBit; 821 822 spec = specifierForUID(instructionID); 823 824 if (getIDWithAttrMask(&instructionIDWithWBit, 825 insn, 826 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { 827 insn->instructionID = instructionID; 828 insn->spec = spec; 829 return 0; 830 } 831 832 specWithWBit = specifierForUID(instructionIDWithWBit); 833 834 if (instructionID != instructionIDWithWBit) { 835 insn->instructionID = instructionIDWithWBit; 836 insn->spec = specWithWBit; 837 } else { 838 insn->instructionID = instructionID; 839 insn->spec = spec; 840 } 841 return 0; 842 } 843 844 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 845 /* 846 * The instruction tables make no distinction between instructions that 847 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 848 * particular spot (i.e., many MMX operations). In general we're 849 * conservative, but in the specific case where OpSize is present but not 850 * in the right place we check if there's a 16-bit operation. 851 */ 852 853 const struct InstructionSpecifier *spec; 854 uint16_t instructionIDWithOpsize; 855 const char *specName, *specWithOpSizeName; 856 857 spec = specifierForUID(instructionID); 858 859 if (getIDWithAttrMask(&instructionIDWithOpsize, 860 insn, 861 attrMask | ATTR_OPSIZE)) { 862 /* 863 * ModRM required with OpSize but not present; give up and return version 864 * without OpSize set 865 */ 866 867 insn->instructionID = instructionID; 868 insn->spec = spec; 869 return 0; 870 } 871 872 specName = x86DisassemblerGetInstrName(instructionID, miiArg); 873 specWithOpSizeName = 874 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); 875 876 if (is16BitEquivalent(specName, specWithOpSizeName)) { 877 insn->instructionID = instructionIDWithOpsize; 878 insn->spec = specifierForUID(instructionIDWithOpsize); 879 } else { 880 insn->instructionID = instructionID; 881 insn->spec = spec; 882 } 883 return 0; 884 } 885 886 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 887 insn->rexPrefix & 0x01) { 888 /* 889 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 890 * it should decode as XCHG %r8, %eax. 891 */ 892 893 const struct InstructionSpecifier *spec; 894 uint16_t instructionIDWithNewOpcode; 895 const struct InstructionSpecifier *specWithNewOpcode; 896 897 spec = specifierForUID(instructionID); 898 899 /* Borrow opcode from one of the other XCHGar opcodes */ 900 insn->opcode = 0x91; 901 902 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 903 insn, 904 attrMask)) { 905 insn->opcode = 0x90; 906 907 insn->instructionID = instructionID; 908 insn->spec = spec; 909 return 0; 910 } 911 912 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 913 914 /* Change back */ 915 insn->opcode = 0x90; 916 917 insn->instructionID = instructionIDWithNewOpcode; 918 insn->spec = specWithNewOpcode; 919 920 return 0; 921 } 922 923 insn->instructionID = instructionID; 924 insn->spec = specifierForUID(insn->instructionID); 925 926 return 0; 927} 928 929/* 930 * readSIB - Consumes the SIB byte to determine addressing information for an 931 * instruction. 932 * 933 * @param insn - The instruction whose SIB byte is to be read. 934 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 935 */ 936static int readSIB(struct InternalInstruction* insn) { 937 SIBIndex sibIndexBase = 0; 938 SIBBase sibBaseBase = 0; 939 uint8_t index, base; 940 941 dbgprintf(insn, "readSIB()"); 942 943 if (insn->consumedSIB) 944 return 0; 945 946 insn->consumedSIB = TRUE; 947 948 switch (insn->addressSize) { 949 case 2: 950 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 951 return -1; 952 break; 953 case 4: 954 sibIndexBase = SIB_INDEX_EAX; 955 sibBaseBase = SIB_BASE_EAX; 956 break; 957 case 8: 958 sibIndexBase = SIB_INDEX_RAX; 959 sibBaseBase = SIB_BASE_RAX; 960 break; 961 } 962 963 if (consumeByte(insn, &insn->sib)) 964 return -1; 965 966 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 967 968 switch (index) { 969 case 0x4: 970 insn->sibIndex = SIB_INDEX_NONE; 971 break; 972 default: 973 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 974 if (insn->sibIndex == SIB_INDEX_sib || 975 insn->sibIndex == SIB_INDEX_sib64) 976 insn->sibIndex = SIB_INDEX_NONE; 977 break; 978 } 979 980 switch (scaleFromSIB(insn->sib)) { 981 case 0: 982 insn->sibScale = 1; 983 break; 984 case 1: 985 insn->sibScale = 2; 986 break; 987 case 2: 988 insn->sibScale = 4; 989 break; 990 case 3: 991 insn->sibScale = 8; 992 break; 993 } 994 995 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 996 997 switch (base) { 998 case 0x5: 999 switch (modFromModRM(insn->modRM)) { 1000 case 0x0: 1001 insn->eaDisplacement = EA_DISP_32; 1002 insn->sibBase = SIB_BASE_NONE; 1003 break; 1004 case 0x1: 1005 insn->eaDisplacement = EA_DISP_8; 1006 insn->sibBase = (insn->addressSize == 4 ? 1007 SIB_BASE_EBP : SIB_BASE_RBP); 1008 break; 1009 case 0x2: 1010 insn->eaDisplacement = EA_DISP_32; 1011 insn->sibBase = (insn->addressSize == 4 ? 1012 SIB_BASE_EBP : SIB_BASE_RBP); 1013 break; 1014 case 0x3: 1015 debug("Cannot have Mod = 0b11 and a SIB byte"); 1016 return -1; 1017 } 1018 break; 1019 default: 1020 insn->sibBase = (SIBBase)(sibBaseBase + base); 1021 break; 1022 } 1023 1024 return 0; 1025} 1026 1027/* 1028 * readDisplacement - Consumes the displacement of an instruction. 1029 * 1030 * @param insn - The instruction whose displacement is to be read. 1031 * @return - 0 if the displacement byte was successfully read; nonzero 1032 * otherwise. 1033 */ 1034static int readDisplacement(struct InternalInstruction* insn) { 1035 int8_t d8; 1036 int16_t d16; 1037 int32_t d32; 1038 1039 dbgprintf(insn, "readDisplacement()"); 1040 1041 if (insn->consumedDisplacement) 1042 return 0; 1043 1044 insn->consumedDisplacement = TRUE; 1045 insn->displacementOffset = insn->readerCursor - insn->startLocation; 1046 1047 switch (insn->eaDisplacement) { 1048 case EA_DISP_NONE: 1049 insn->consumedDisplacement = FALSE; 1050 break; 1051 case EA_DISP_8: 1052 if (consumeInt8(insn, &d8)) 1053 return -1; 1054 insn->displacement = d8; 1055 break; 1056 case EA_DISP_16: 1057 if (consumeInt16(insn, &d16)) 1058 return -1; 1059 insn->displacement = d16; 1060 break; 1061 case EA_DISP_32: 1062 if (consumeInt32(insn, &d32)) 1063 return -1; 1064 insn->displacement = d32; 1065 break; 1066 } 1067 1068 insn->consumedDisplacement = TRUE; 1069 return 0; 1070} 1071 1072/* 1073 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1074 * displacement) for an instruction and interprets it. 1075 * 1076 * @param insn - The instruction whose addressing information is to be read. 1077 * @return - 0 if the information was successfully read; nonzero otherwise. 1078 */ 1079static int readModRM(struct InternalInstruction* insn) { 1080 uint8_t mod, rm, reg; 1081 1082 dbgprintf(insn, "readModRM()"); 1083 1084 if (insn->consumedModRM) 1085 return 0; 1086 1087 if (consumeByte(insn, &insn->modRM)) 1088 return -1; 1089 insn->consumedModRM = TRUE; 1090 1091 mod = modFromModRM(insn->modRM); 1092 rm = rmFromModRM(insn->modRM); 1093 reg = regFromModRM(insn->modRM); 1094 1095 /* 1096 * This goes by insn->registerSize to pick the correct register, which messes 1097 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1098 * fixupReg(). 1099 */ 1100 switch (insn->registerSize) { 1101 case 2: 1102 insn->regBase = MODRM_REG_AX; 1103 insn->eaRegBase = EA_REG_AX; 1104 break; 1105 case 4: 1106 insn->regBase = MODRM_REG_EAX; 1107 insn->eaRegBase = EA_REG_EAX; 1108 break; 1109 case 8: 1110 insn->regBase = MODRM_REG_RAX; 1111 insn->eaRegBase = EA_REG_RAX; 1112 break; 1113 } 1114 1115 reg |= rFromREX(insn->rexPrefix) << 3; 1116 rm |= bFromREX(insn->rexPrefix) << 3; 1117 1118 insn->reg = (Reg)(insn->regBase + reg); 1119 1120 switch (insn->addressSize) { 1121 case 2: 1122 insn->eaBaseBase = EA_BASE_BX_SI; 1123 1124 switch (mod) { 1125 case 0x0: 1126 if (rm == 0x6) { 1127 insn->eaBase = EA_BASE_NONE; 1128 insn->eaDisplacement = EA_DISP_16; 1129 if (readDisplacement(insn)) 1130 return -1; 1131 } else { 1132 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1133 insn->eaDisplacement = EA_DISP_NONE; 1134 } 1135 break; 1136 case 0x1: 1137 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1138 insn->eaDisplacement = EA_DISP_8; 1139 if (readDisplacement(insn)) 1140 return -1; 1141 break; 1142 case 0x2: 1143 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1144 insn->eaDisplacement = EA_DISP_16; 1145 if (readDisplacement(insn)) 1146 return -1; 1147 break; 1148 case 0x3: 1149 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1150 if (readDisplacement(insn)) 1151 return -1; 1152 break; 1153 } 1154 break; 1155 case 4: 1156 case 8: 1157 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1158 1159 switch (mod) { 1160 case 0x0: 1161 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1162 switch (rm) { 1163 case 0x4: 1164 case 0xc: /* in case REXW.b is set */ 1165 insn->eaBase = (insn->addressSize == 4 ? 1166 EA_BASE_sib : EA_BASE_sib64); 1167 readSIB(insn); 1168 if (readDisplacement(insn)) 1169 return -1; 1170 break; 1171 case 0x5: 1172 insn->eaBase = EA_BASE_NONE; 1173 insn->eaDisplacement = EA_DISP_32; 1174 if (readDisplacement(insn)) 1175 return -1; 1176 break; 1177 default: 1178 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1179 break; 1180 } 1181 break; 1182 case 0x1: 1183 case 0x2: 1184 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1185 switch (rm) { 1186 case 0x4: 1187 case 0xc: /* in case REXW.b is set */ 1188 insn->eaBase = EA_BASE_sib; 1189 readSIB(insn); 1190 if (readDisplacement(insn)) 1191 return -1; 1192 break; 1193 default: 1194 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1195 if (readDisplacement(insn)) 1196 return -1; 1197 break; 1198 } 1199 break; 1200 case 0x3: 1201 insn->eaDisplacement = EA_DISP_NONE; 1202 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1203 break; 1204 } 1205 break; 1206 } /* switch (insn->addressSize) */ 1207 1208 return 0; 1209} 1210 1211#define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1212 static uint8_t name(struct InternalInstruction *insn, \ 1213 OperandType type, \ 1214 uint8_t index, \ 1215 uint8_t *valid) { \ 1216 *valid = 1; \ 1217 switch (type) { \ 1218 default: \ 1219 debug("Unhandled register type"); \ 1220 *valid = 0; \ 1221 return 0; \ 1222 case TYPE_Rv: \ 1223 return base + index; \ 1224 case TYPE_R8: \ 1225 if (insn->rexPrefix && \ 1226 index >= 4 && index <= 7) { \ 1227 return prefix##_SPL + (index - 4); \ 1228 } else { \ 1229 return prefix##_AL + index; \ 1230 } \ 1231 case TYPE_R16: \ 1232 return prefix##_AX + index; \ 1233 case TYPE_R32: \ 1234 return prefix##_EAX + index; \ 1235 case TYPE_R64: \ 1236 return prefix##_RAX + index; \ 1237 case TYPE_XMM256: \ 1238 return prefix##_YMM0 + index; \ 1239 case TYPE_XMM128: \ 1240 case TYPE_XMM64: \ 1241 case TYPE_XMM32: \ 1242 case TYPE_XMM: \ 1243 return prefix##_XMM0 + index; \ 1244 case TYPE_MM64: \ 1245 case TYPE_MM32: \ 1246 case TYPE_MM: \ 1247 if (index > 7) \ 1248 *valid = 0; \ 1249 return prefix##_MM0 + index; \ 1250 case TYPE_SEGMENTREG: \ 1251 if (index > 5) \ 1252 *valid = 0; \ 1253 return prefix##_ES + index; \ 1254 case TYPE_DEBUGREG: \ 1255 if (index > 7) \ 1256 *valid = 0; \ 1257 return prefix##_DR0 + index; \ 1258 case TYPE_CONTROLREG: \ 1259 if (index > 8) \ 1260 *valid = 0; \ 1261 return prefix##_CR0 + index; \ 1262 } \ 1263 } 1264 1265/* 1266 * fixup*Value - Consults an operand type to determine the meaning of the 1267 * reg or R/M field. If the operand is an XMM operand, for example, an 1268 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1269 * misinterpret it as. 1270 * 1271 * @param insn - The instruction containing the operand. 1272 * @param type - The operand type. 1273 * @param index - The existing value of the field as reported by readModRM(). 1274 * @param valid - The address of a uint8_t. The target is set to 1 if the 1275 * field is valid for the register class; 0 if not. 1276 * @return - The proper value. 1277 */ 1278GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1279GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1280 1281/* 1282 * fixupReg - Consults an operand specifier to determine which of the 1283 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1284 * 1285 * @param insn - See fixup*Value(). 1286 * @param op - The operand specifier. 1287 * @return - 0 if fixup was successful; -1 if the register returned was 1288 * invalid for its class. 1289 */ 1290static int fixupReg(struct InternalInstruction *insn, 1291 const struct OperandSpecifier *op) { 1292 uint8_t valid; 1293 1294 dbgprintf(insn, "fixupReg()"); 1295 1296 switch ((OperandEncoding)op->encoding) { 1297 default: 1298 debug("Expected a REG or R/M encoding in fixupReg"); 1299 return -1; 1300 case ENCODING_VVVV: 1301 insn->vvvv = (Reg)fixupRegValue(insn, 1302 (OperandType)op->type, 1303 insn->vvvv, 1304 &valid); 1305 if (!valid) 1306 return -1; 1307 break; 1308 case ENCODING_REG: 1309 insn->reg = (Reg)fixupRegValue(insn, 1310 (OperandType)op->type, 1311 insn->reg - insn->regBase, 1312 &valid); 1313 if (!valid) 1314 return -1; 1315 break; 1316 case ENCODING_RM: 1317 if (insn->eaBase >= insn->eaRegBase) { 1318 insn->eaBase = (EABase)fixupRMValue(insn, 1319 (OperandType)op->type, 1320 insn->eaBase - insn->eaRegBase, 1321 &valid); 1322 if (!valid) 1323 return -1; 1324 } 1325 break; 1326 } 1327 1328 return 0; 1329} 1330 1331/* 1332 * readOpcodeModifier - Reads an operand from the opcode field of an 1333 * instruction. Handles AddRegFrm instructions. 1334 * 1335 * @param insn - The instruction whose opcode field is to be read. 1336 * @param inModRM - Indicates that the opcode field is to be read from the 1337 * ModR/M extension; useful for escape opcodes 1338 * @return - 0 on success; nonzero otherwise. 1339 */ 1340static int readOpcodeModifier(struct InternalInstruction* insn) { 1341 dbgprintf(insn, "readOpcodeModifier()"); 1342 1343 if (insn->consumedOpcodeModifier) 1344 return 0; 1345 1346 insn->consumedOpcodeModifier = TRUE; 1347 1348 switch (insn->spec->modifierType) { 1349 default: 1350 debug("Unknown modifier type."); 1351 return -1; 1352 case MODIFIER_NONE: 1353 debug("No modifier but an operand expects one."); 1354 return -1; 1355 case MODIFIER_OPCODE: 1356 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1357 return 0; 1358 case MODIFIER_MODRM: 1359 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1360 return 0; 1361 } 1362} 1363 1364/* 1365 * readOpcodeRegister - Reads an operand from the opcode field of an 1366 * instruction and interprets it appropriately given the operand width. 1367 * Handles AddRegFrm instructions. 1368 * 1369 * @param insn - See readOpcodeModifier(). 1370 * @param size - The width (in bytes) of the register being specified. 1371 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1372 * RAX. 1373 * @return - 0 on success; nonzero otherwise. 1374 */ 1375static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1376 dbgprintf(insn, "readOpcodeRegister()"); 1377 1378 if (readOpcodeModifier(insn)) 1379 return -1; 1380 1381 if (size == 0) 1382 size = insn->registerSize; 1383 1384 switch (size) { 1385 case 1: 1386 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1387 | insn->opcodeModifier)); 1388 if (insn->rexPrefix && 1389 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1390 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1391 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1392 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1393 } 1394 1395 break; 1396 case 2: 1397 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1398 + ((bFromREX(insn->rexPrefix) << 3) 1399 | insn->opcodeModifier)); 1400 break; 1401 case 4: 1402 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1403 + ((bFromREX(insn->rexPrefix) << 3) 1404 | insn->opcodeModifier)); 1405 break; 1406 case 8: 1407 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1408 + ((bFromREX(insn->rexPrefix) << 3) 1409 | insn->opcodeModifier)); 1410 break; 1411 } 1412 1413 return 0; 1414} 1415 1416/* 1417 * readImmediate - Consumes an immediate operand from an instruction, given the 1418 * desired operand size. 1419 * 1420 * @param insn - The instruction whose operand is to be read. 1421 * @param size - The width (in bytes) of the operand. 1422 * @return - 0 if the immediate was successfully consumed; nonzero 1423 * otherwise. 1424 */ 1425static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1426 uint8_t imm8; 1427 uint16_t imm16; 1428 uint32_t imm32; 1429 uint64_t imm64; 1430 1431 dbgprintf(insn, "readImmediate()"); 1432 1433 if (insn->numImmediatesConsumed == 2) { 1434 debug("Already consumed two immediates"); 1435 return -1; 1436 } 1437 1438 if (size == 0) 1439 size = insn->immediateSize; 1440 else 1441 insn->immediateSize = size; 1442 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1443 1444 switch (size) { 1445 case 1: 1446 if (consumeByte(insn, &imm8)) 1447 return -1; 1448 insn->immediates[insn->numImmediatesConsumed] = imm8; 1449 break; 1450 case 2: 1451 if (consumeUInt16(insn, &imm16)) 1452 return -1; 1453 insn->immediates[insn->numImmediatesConsumed] = imm16; 1454 break; 1455 case 4: 1456 if (consumeUInt32(insn, &imm32)) 1457 return -1; 1458 insn->immediates[insn->numImmediatesConsumed] = imm32; 1459 break; 1460 case 8: 1461 if (consumeUInt64(insn, &imm64)) 1462 return -1; 1463 insn->immediates[insn->numImmediatesConsumed] = imm64; 1464 break; 1465 } 1466 1467 insn->numImmediatesConsumed++; 1468 1469 return 0; 1470} 1471 1472/* 1473 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1474 * 1475 * @param insn - The instruction whose operand is to be read. 1476 * @return - 0 if the vvvv was successfully consumed; nonzero 1477 * otherwise. 1478 */ 1479static int readVVVV(struct InternalInstruction* insn) { 1480 dbgprintf(insn, "readVVVV()"); 1481 1482 if (insn->vexSize == 3) 1483 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); 1484 else if (insn->vexSize == 2) 1485 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); 1486 else 1487 return -1; 1488 1489 if (insn->mode != MODE_64BIT) 1490 insn->vvvv &= 0x7; 1491 1492 return 0; 1493} 1494 1495/* 1496 * readOperands - Consults the specifier for an instruction and consumes all 1497 * operands for that instruction, interpreting them as it goes. 1498 * 1499 * @param insn - The instruction whose operands are to be read and interpreted. 1500 * @return - 0 if all operands could be read; nonzero otherwise. 1501 */ 1502static int readOperands(struct InternalInstruction* insn) { 1503 int index; 1504 int hasVVVV, needVVVV; 1505 int sawRegImm = 0; 1506 1507 dbgprintf(insn, "readOperands()"); 1508 1509 /* If non-zero vvvv specified, need to make sure one of the operands 1510 uses it. */ 1511 hasVVVV = !readVVVV(insn); 1512 needVVVV = hasVVVV && (insn->vvvv != 0); 1513 1514 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1515 switch (x86OperandSets[insn->spec->operands][index].encoding) { 1516 case ENCODING_NONE: 1517 break; 1518 case ENCODING_REG: 1519 case ENCODING_RM: 1520 if (readModRM(insn)) 1521 return -1; 1522 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1523 return -1; 1524 break; 1525 case ENCODING_CB: 1526 case ENCODING_CW: 1527 case ENCODING_CD: 1528 case ENCODING_CP: 1529 case ENCODING_CO: 1530 case ENCODING_CT: 1531 dbgprintf(insn, "We currently don't hande code-offset encodings"); 1532 return -1; 1533 case ENCODING_IB: 1534 if (sawRegImm) { 1535 /* Saw a register immediate so don't read again and instead split the 1536 previous immediate. FIXME: This is a hack. */ 1537 insn->immediates[insn->numImmediatesConsumed] = 1538 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1539 ++insn->numImmediatesConsumed; 1540 break; 1541 } 1542 if (readImmediate(insn, 1)) 1543 return -1; 1544 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && 1545 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1546 return -1; 1547 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && 1548 insn->immediates[insn->numImmediatesConsumed - 1] > 31) 1549 return -1; 1550 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || 1551 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) 1552 sawRegImm = 1; 1553 break; 1554 case ENCODING_IW: 1555 if (readImmediate(insn, 2)) 1556 return -1; 1557 break; 1558 case ENCODING_ID: 1559 if (readImmediate(insn, 4)) 1560 return -1; 1561 break; 1562 case ENCODING_IO: 1563 if (readImmediate(insn, 8)) 1564 return -1; 1565 break; 1566 case ENCODING_Iv: 1567 if (readImmediate(insn, insn->immediateSize)) 1568 return -1; 1569 break; 1570 case ENCODING_Ia: 1571 if (readImmediate(insn, insn->addressSize)) 1572 return -1; 1573 break; 1574 case ENCODING_RB: 1575 if (readOpcodeRegister(insn, 1)) 1576 return -1; 1577 break; 1578 case ENCODING_RW: 1579 if (readOpcodeRegister(insn, 2)) 1580 return -1; 1581 break; 1582 case ENCODING_RD: 1583 if (readOpcodeRegister(insn, 4)) 1584 return -1; 1585 break; 1586 case ENCODING_RO: 1587 if (readOpcodeRegister(insn, 8)) 1588 return -1; 1589 break; 1590 case ENCODING_Rv: 1591 if (readOpcodeRegister(insn, 0)) 1592 return -1; 1593 break; 1594 case ENCODING_I: 1595 if (readOpcodeModifier(insn)) 1596 return -1; 1597 break; 1598 case ENCODING_VVVV: 1599 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1600 if (!hasVVVV) 1601 return -1; 1602 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1603 return -1; 1604 break; 1605 case ENCODING_DUP: 1606 break; 1607 default: 1608 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1609 return -1; 1610 } 1611 } 1612 1613 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1614 if (needVVVV) return -1; 1615 1616 return 0; 1617} 1618 1619/* 1620 * decodeInstruction - Reads and interprets a full instruction provided by the 1621 * user. 1622 * 1623 * @param insn - A pointer to the instruction to be populated. Must be 1624 * pre-allocated. 1625 * @param reader - The function to be used to read the instruction's bytes. 1626 * @param readerArg - A generic argument to be passed to the reader to store 1627 * any internal state. 1628 * @param logger - If non-NULL, the function to be used to write log messages 1629 * and warnings. 1630 * @param loggerArg - A generic argument to be passed to the logger to store 1631 * any internal state. 1632 * @param startLoc - The address (in the reader's address space) of the first 1633 * byte in the instruction. 1634 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1635 * decode the instruction in. 1636 * @return - 0 if the instruction's memory could be read; nonzero if 1637 * not. 1638 */ 1639int decodeInstruction(struct InternalInstruction* insn, 1640 byteReader_t reader, 1641 const void* readerArg, 1642 dlog_t logger, 1643 void* loggerArg, 1644 const void* miiArg, 1645 uint64_t startLoc, 1646 DisassemblerMode mode) { 1647 memset(insn, 0, sizeof(struct InternalInstruction)); 1648 1649 insn->reader = reader; 1650 insn->readerArg = readerArg; 1651 insn->dlog = logger; 1652 insn->dlogArg = loggerArg; 1653 insn->startLocation = startLoc; 1654 insn->readerCursor = startLoc; 1655 insn->mode = mode; 1656 insn->numImmediatesConsumed = 0; 1657 1658 if (readPrefixes(insn) || 1659 readOpcode(insn) || 1660 getID(insn, miiArg) || 1661 insn->instructionID == 0 || 1662 readOperands(insn)) 1663 return -1; 1664 1665 insn->operands = &x86OperandSets[insn->spec->operands][0]; 1666 1667 insn->length = insn->readerCursor - insn->startLocation; 1668 1669 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1670 startLoc, insn->readerCursor, insn->length); 1671 1672 if (insn->length > 15) 1673 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1674 1675 return 0; 1676} 1677