1/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16#include <stdarg.h> /* for va_*() */ 17#include <stdio.h> /* for vsnprintf() */ 18#include <stdlib.h> /* for exit() */ 19#include <string.h> /* for memset() */ 20 21#include "X86DisassemblerDecoder.h" 22 23#include "X86GenDisassemblerTables.inc" 24 25#define TRUE 1 26#define FALSE 0 27 28typedef int8_t bool; 29 30#ifndef NDEBUG 31#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 32#else 33#define debug(s) do { } while (0) 34#endif 35 36 37/* 38 * contextForAttrs - Client for the instruction context table. Takes a set of 39 * attributes and returns the appropriate decode context. 40 * 41 * @param attrMask - Attributes, from the enumeration attributeBits. 42 * @return - The InstructionContext to use when looking up an 43 * an instruction with these attributes. 44 */ 45static InstructionContext contextForAttrs(uint8_t attrMask) { 46 return CONTEXTS_SYM[attrMask]; 47} 48 49/* 50 * modRMRequired - Reads the appropriate instruction table to determine whether 51 * the ModR/M byte is required to decode a particular instruction. 52 * 53 * @param type - The opcode type (i.e., how many bytes it has). 54 * @param insnContext - The context for the instruction, as returned by 55 * contextForAttrs. 56 * @param opcode - The last byte of the instruction's opcode, not counting 57 * ModR/M extensions and escapes. 58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 59 */ 60static int modRMRequired(OpcodeType type, 61 InstructionContext insnContext, 62 uint8_t opcode) { 63 const struct ContextDecision* decision = 0; 64 65 switch (type) { 66 case ONEBYTE: 67 decision = &ONEBYTE_SYM; 68 break; 69 case TWOBYTE: 70 decision = &TWOBYTE_SYM; 71 break; 72 case THREEBYTE_38: 73 decision = &THREEBYTE38_SYM; 74 break; 75 case THREEBYTE_3A: 76 decision = &THREEBYTE3A_SYM; 77 break; 78 case THREEBYTE_A6: 79 decision = &THREEBYTEA6_SYM; 80 break; 81 case THREEBYTE_A7: 82 decision = &THREEBYTEA7_SYM; 83 break; 84 } 85 86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 87 modrm_type != MODRM_ONEENTRY; 88} 89 90/* 91 * decode - Reads the appropriate instruction table to obtain the unique ID of 92 * an instruction. 93 * 94 * @param type - See modRMRequired(). 95 * @param insnContext - See modRMRequired(). 96 * @param opcode - See modRMRequired(). 97 * @param modRM - The ModR/M byte if required, or any value if not. 98 * @return - The UID of the instruction, or 0 on failure. 99 */ 100static InstrUID decode(OpcodeType type, 101 InstructionContext insnContext, 102 uint8_t opcode, 103 uint8_t modRM) { 104 const struct ModRMDecision* dec = 0; 105 106 switch (type) { 107 case ONEBYTE: 108 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 109 break; 110 case TWOBYTE: 111 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 112 break; 113 case THREEBYTE_38: 114 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 115 break; 116 case THREEBYTE_3A: 117 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 118 break; 119 case THREEBYTE_A6: 120 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 121 break; 122 case THREEBYTE_A7: 123 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 124 break; 125 } 126 127 switch (dec->modrm_type) { 128 default: 129 debug("Corrupt table! Unknown modrm_type"); 130 return 0; 131 case MODRM_ONEENTRY: 132 return modRMTable[dec->instructionIDs]; 133 case MODRM_SPLITRM: 134 if (modFromModRM(modRM) == 0x3) 135 return modRMTable[dec->instructionIDs+1]; 136 return modRMTable[dec->instructionIDs]; 137 case MODRM_SPLITREG: 138 if (modFromModRM(modRM) == 0x3) 139 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 140 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 141 case MODRM_SPLITMISC: 142 if (modFromModRM(modRM) == 0x3) 143 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 144 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 145 case MODRM_FULL: 146 return modRMTable[dec->instructionIDs+modRM]; 147 } 148} 149 150/* 151 * specifierForUID - Given a UID, returns the name and operand specification for 152 * that instruction. 153 * 154 * @param uid - The unique ID for the instruction. This should be returned by 155 * decode(); specifierForUID will not check bounds. 156 * @return - A pointer to the specification for that instruction. 157 */ 158static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 159 return &INSTRUCTIONS_SYM[uid]; 160} 161 162/* 163 * consumeByte - Uses the reader function provided by the user to consume one 164 * byte from the instruction's memory and advance the cursor. 165 * 166 * @param insn - The instruction with the reader function to use. The cursor 167 * for this instruction is advanced. 168 * @param byte - A pointer to a pre-allocated memory buffer to be populated 169 * with the data read. 170 * @return - 0 if the read was successful; nonzero otherwise. 171 */ 172static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 173 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 174 175 if (!ret) 176 ++(insn->readerCursor); 177 178 return ret; 179} 180 181/* 182 * lookAtByte - Like consumeByte, but does not advance the cursor. 183 * 184 * @param insn - See consumeByte(). 185 * @param byte - See consumeByte(). 186 * @return - See consumeByte(). 187 */ 188static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 189 return insn->reader(insn->readerArg, byte, insn->readerCursor); 190} 191 192static void unconsumeByte(struct InternalInstruction* insn) { 193 insn->readerCursor--; 194} 195 196#define CONSUME_FUNC(name, type) \ 197 static int name(struct InternalInstruction* insn, type* ptr) { \ 198 type combined = 0; \ 199 unsigned offset; \ 200 for (offset = 0; offset < sizeof(type); ++offset) { \ 201 uint8_t byte; \ 202 int ret = insn->reader(insn->readerArg, \ 203 &byte, \ 204 insn->readerCursor + offset); \ 205 if (ret) \ 206 return ret; \ 207 combined = combined | ((uint64_t)byte << (offset * 8)); \ 208 } \ 209 *ptr = combined; \ 210 insn->readerCursor += sizeof(type); \ 211 return 0; \ 212 } 213 214/* 215 * consume* - Use the reader function provided by the user to consume data 216 * values of various sizes from the instruction's memory and advance the 217 * cursor appropriately. These readers perform endian conversion. 218 * 219 * @param insn - See consumeByte(). 220 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 221 * be populated with the data read. 222 * @return - See consumeByte(). 223 */ 224CONSUME_FUNC(consumeInt8, int8_t) 225CONSUME_FUNC(consumeInt16, int16_t) 226CONSUME_FUNC(consumeInt32, int32_t) 227CONSUME_FUNC(consumeUInt16, uint16_t) 228CONSUME_FUNC(consumeUInt32, uint32_t) 229CONSUME_FUNC(consumeUInt64, uint64_t) 230 231/* 232 * dbgprintf - Uses the logging function provided by the user to log a single 233 * message, typically without a carriage-return. 234 * 235 * @param insn - The instruction containing the logging function. 236 * @param format - See printf(). 237 * @param ... - See printf(). 238 */ 239static void dbgprintf(struct InternalInstruction* insn, 240 const char* format, 241 ...) { 242 char buffer[256]; 243 va_list ap; 244 245 if (!insn->dlog) 246 return; 247 248 va_start(ap, format); 249 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 250 va_end(ap); 251 252 insn->dlog(insn->dlogArg, buffer); 253 254 return; 255} 256 257/* 258 * setPrefixPresent - Marks that a particular prefix is present at a particular 259 * location. 260 * 261 * @param insn - The instruction to be marked as having the prefix. 262 * @param prefix - The prefix that is present. 263 * @param location - The location where the prefix is located (in the address 264 * space of the instruction's reader). 265 */ 266static void setPrefixPresent(struct InternalInstruction* insn, 267 uint8_t prefix, 268 uint64_t location) 269{ 270 insn->prefixPresent[prefix] = 1; 271 insn->prefixLocations[prefix] = location; 272} 273 274/* 275 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 276 * present at a given location. 277 * 278 * @param insn - The instruction to be queried. 279 * @param prefix - The prefix. 280 * @param location - The location to query. 281 * @return - Whether the prefix is at that location. 282 */ 283static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 284 uint8_t prefix, 285 uint64_t location) 286{ 287 if (insn->prefixPresent[prefix] == 1 && 288 insn->prefixLocations[prefix] == location) 289 return TRUE; 290 else 291 return FALSE; 292} 293 294/* 295 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 296 * instruction as having them. Also sets the instruction's default operand, 297 * address, and other relevant data sizes to report operands correctly. 298 * 299 * @param insn - The instruction whose prefixes are to be read. 300 * @return - 0 if the instruction could be read until the end of the prefix 301 * bytes, and no prefixes conflicted; nonzero otherwise. 302 */ 303static int readPrefixes(struct InternalInstruction* insn) { 304 BOOL isPrefix = TRUE; 305 BOOL prefixGroups[4] = { FALSE }; 306 uint64_t prefixLocation; 307 uint8_t byte = 0; 308 309 BOOL hasAdSize = FALSE; 310 BOOL hasOpSize = FALSE; 311 312 dbgprintf(insn, "readPrefixes()"); 313 314 while (isPrefix) { 315 prefixLocation = insn->readerCursor; 316 317 if (consumeByte(insn, &byte)) 318 return -1; 319 320 /* 321 * If the first byte is a LOCK prefix break and let it be disassembled 322 * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>. 323 * FIXME there is currently no way to get the disassembler to print the 324 * lock prefix if it is not the first byte. 325 */ 326 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) 327 break; 328 329 switch (byte) { 330 case 0xf0: /* LOCK */ 331 case 0xf2: /* REPNE/REPNZ */ 332 case 0xf3: /* REP or REPE/REPZ */ 333 if (prefixGroups[0]) 334 dbgprintf(insn, "Redundant Group 1 prefix"); 335 prefixGroups[0] = TRUE; 336 setPrefixPresent(insn, byte, prefixLocation); 337 break; 338 case 0x2e: /* CS segment override -OR- Branch not taken */ 339 case 0x36: /* SS segment override -OR- Branch taken */ 340 case 0x3e: /* DS segment override */ 341 case 0x26: /* ES segment override */ 342 case 0x64: /* FS segment override */ 343 case 0x65: /* GS segment override */ 344 switch (byte) { 345 case 0x2e: 346 insn->segmentOverride = SEG_OVERRIDE_CS; 347 break; 348 case 0x36: 349 insn->segmentOverride = SEG_OVERRIDE_SS; 350 break; 351 case 0x3e: 352 insn->segmentOverride = SEG_OVERRIDE_DS; 353 break; 354 case 0x26: 355 insn->segmentOverride = SEG_OVERRIDE_ES; 356 break; 357 case 0x64: 358 insn->segmentOverride = SEG_OVERRIDE_FS; 359 break; 360 case 0x65: 361 insn->segmentOverride = SEG_OVERRIDE_GS; 362 break; 363 default: 364 debug("Unhandled override"); 365 return -1; 366 } 367 if (prefixGroups[1]) 368 dbgprintf(insn, "Redundant Group 2 prefix"); 369 prefixGroups[1] = TRUE; 370 setPrefixPresent(insn, byte, prefixLocation); 371 break; 372 case 0x66: /* Operand-size override */ 373 if (prefixGroups[2]) 374 dbgprintf(insn, "Redundant Group 3 prefix"); 375 prefixGroups[2] = TRUE; 376 hasOpSize = TRUE; 377 setPrefixPresent(insn, byte, prefixLocation); 378 break; 379 case 0x67: /* Address-size override */ 380 if (prefixGroups[3]) 381 dbgprintf(insn, "Redundant Group 4 prefix"); 382 prefixGroups[3] = TRUE; 383 hasAdSize = TRUE; 384 setPrefixPresent(insn, byte, prefixLocation); 385 break; 386 default: /* Not a prefix byte */ 387 isPrefix = FALSE; 388 break; 389 } 390 391 if (isPrefix) 392 dbgprintf(insn, "Found prefix 0x%hhx", byte); 393 } 394 395 insn->vexSize = 0; 396 397 if (byte == 0xc4) { 398 uint8_t byte1; 399 400 if (lookAtByte(insn, &byte1)) { 401 dbgprintf(insn, "Couldn't read second byte of VEX"); 402 return -1; 403 } 404 405 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 406 insn->vexSize = 3; 407 insn->necessaryPrefixLocation = insn->readerCursor - 1; 408 } 409 else { 410 unconsumeByte(insn); 411 insn->necessaryPrefixLocation = insn->readerCursor - 1; 412 } 413 414 if (insn->vexSize == 3) { 415 insn->vexPrefix[0] = byte; 416 consumeByte(insn, &insn->vexPrefix[1]); 417 consumeByte(insn, &insn->vexPrefix[2]); 418 419 /* We simulate the REX prefix for simplicity's sake */ 420 421 if (insn->mode == MODE_64BIT) { 422 insn->rexPrefix = 0x40 423 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) 424 | (rFromVEX2of3(insn->vexPrefix[1]) << 2) 425 | (xFromVEX2of3(insn->vexPrefix[1]) << 1) 426 | (bFromVEX2of3(insn->vexPrefix[1]) << 0); 427 } 428 429 switch (ppFromVEX3of3(insn->vexPrefix[2])) 430 { 431 default: 432 break; 433 case VEX_PREFIX_66: 434 hasOpSize = TRUE; 435 break; 436 } 437 438 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); 439 } 440 } 441 else if (byte == 0xc5) { 442 uint8_t byte1; 443 444 if (lookAtByte(insn, &byte1)) { 445 dbgprintf(insn, "Couldn't read second byte of VEX"); 446 return -1; 447 } 448 449 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 450 insn->vexSize = 2; 451 } 452 else { 453 unconsumeByte(insn); 454 } 455 456 if (insn->vexSize == 2) { 457 insn->vexPrefix[0] = byte; 458 consumeByte(insn, &insn->vexPrefix[1]); 459 460 if (insn->mode == MODE_64BIT) { 461 insn->rexPrefix = 0x40 462 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); 463 } 464 465 switch (ppFromVEX2of2(insn->vexPrefix[1])) 466 { 467 default: 468 break; 469 case VEX_PREFIX_66: 470 hasOpSize = TRUE; 471 break; 472 } 473 474 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); 475 } 476 } 477 else { 478 if (insn->mode == MODE_64BIT) { 479 if ((byte & 0xf0) == 0x40) { 480 uint8_t opcodeByte; 481 482 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 483 dbgprintf(insn, "Redundant REX prefix"); 484 return -1; 485 } 486 487 insn->rexPrefix = byte; 488 insn->necessaryPrefixLocation = insn->readerCursor - 2; 489 490 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 491 } else { 492 unconsumeByte(insn); 493 insn->necessaryPrefixLocation = insn->readerCursor - 1; 494 } 495 } else { 496 unconsumeByte(insn); 497 insn->necessaryPrefixLocation = insn->readerCursor - 1; 498 } 499 } 500 501 if (insn->mode == MODE_16BIT) { 502 insn->registerSize = (hasOpSize ? 4 : 2); 503 insn->addressSize = (hasAdSize ? 4 : 2); 504 insn->displacementSize = (hasAdSize ? 4 : 2); 505 insn->immediateSize = (hasOpSize ? 4 : 2); 506 } else if (insn->mode == MODE_32BIT) { 507 insn->registerSize = (hasOpSize ? 2 : 4); 508 insn->addressSize = (hasAdSize ? 2 : 4); 509 insn->displacementSize = (hasAdSize ? 2 : 4); 510 insn->immediateSize = (hasOpSize ? 2 : 4); 511 } else if (insn->mode == MODE_64BIT) { 512 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 513 insn->registerSize = 8; 514 insn->addressSize = (hasAdSize ? 4 : 8); 515 insn->displacementSize = 4; 516 insn->immediateSize = 4; 517 } else if (insn->rexPrefix) { 518 insn->registerSize = (hasOpSize ? 2 : 4); 519 insn->addressSize = (hasAdSize ? 4 : 8); 520 insn->displacementSize = (hasOpSize ? 2 : 4); 521 insn->immediateSize = (hasOpSize ? 2 : 4); 522 } else { 523 insn->registerSize = (hasOpSize ? 2 : 4); 524 insn->addressSize = (hasAdSize ? 4 : 8); 525 insn->displacementSize = (hasOpSize ? 2 : 4); 526 insn->immediateSize = (hasOpSize ? 2 : 4); 527 } 528 } 529 530 return 0; 531} 532 533/* 534 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 535 * extended or escape opcodes). 536 * 537 * @param insn - The instruction whose opcode is to be read. 538 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 539 */ 540static int readOpcode(struct InternalInstruction* insn) { 541 /* Determine the length of the primary opcode */ 542 543 uint8_t current; 544 545 dbgprintf(insn, "readOpcode()"); 546 547 insn->opcodeType = ONEBYTE; 548 549 if (insn->vexSize == 3) 550 { 551 switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) 552 { 553 default: 554 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); 555 return -1; 556 case 0: 557 break; 558 case VEX_LOB_0F: 559 insn->twoByteEscape = 0x0f; 560 insn->opcodeType = TWOBYTE; 561 return consumeByte(insn, &insn->opcode); 562 case VEX_LOB_0F38: 563 insn->twoByteEscape = 0x0f; 564 insn->threeByteEscape = 0x38; 565 insn->opcodeType = THREEBYTE_38; 566 return consumeByte(insn, &insn->opcode); 567 case VEX_LOB_0F3A: 568 insn->twoByteEscape = 0x0f; 569 insn->threeByteEscape = 0x3a; 570 insn->opcodeType = THREEBYTE_3A; 571 return consumeByte(insn, &insn->opcode); 572 } 573 } 574 else if (insn->vexSize == 2) 575 { 576 insn->twoByteEscape = 0x0f; 577 insn->opcodeType = TWOBYTE; 578 return consumeByte(insn, &insn->opcode); 579 } 580 581 if (consumeByte(insn, ¤t)) 582 return -1; 583 584 if (current == 0x0f) { 585 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 586 587 insn->twoByteEscape = current; 588 589 if (consumeByte(insn, ¤t)) 590 return -1; 591 592 if (current == 0x38) { 593 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 594 595 insn->threeByteEscape = current; 596 597 if (consumeByte(insn, ¤t)) 598 return -1; 599 600 insn->opcodeType = THREEBYTE_38; 601 } else if (current == 0x3a) { 602 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 603 604 insn->threeByteEscape = current; 605 606 if (consumeByte(insn, ¤t)) 607 return -1; 608 609 insn->opcodeType = THREEBYTE_3A; 610 } else if (current == 0xa6) { 611 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 612 613 insn->threeByteEscape = current; 614 615 if (consumeByte(insn, ¤t)) 616 return -1; 617 618 insn->opcodeType = THREEBYTE_A6; 619 } else if (current == 0xa7) { 620 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 621 622 insn->threeByteEscape = current; 623 624 if (consumeByte(insn, ¤t)) 625 return -1; 626 627 insn->opcodeType = THREEBYTE_A7; 628 } else { 629 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 630 631 insn->opcodeType = TWOBYTE; 632 } 633 } 634 635 /* 636 * At this point we have consumed the full opcode. 637 * Anything we consume from here on must be unconsumed. 638 */ 639 640 insn->opcode = current; 641 642 return 0; 643} 644 645static int readModRM(struct InternalInstruction* insn); 646 647/* 648 * getIDWithAttrMask - Determines the ID of an instruction, consuming 649 * the ModR/M byte as appropriate for extended and escape opcodes, 650 * and using a supplied attribute mask. 651 * 652 * @param instructionID - A pointer whose target is filled in with the ID of the 653 * instruction. 654 * @param insn - The instruction whose ID is to be determined. 655 * @param attrMask - The attribute mask to search. 656 * @return - 0 if the ModR/M could be read when needed or was not 657 * needed; nonzero otherwise. 658 */ 659static int getIDWithAttrMask(uint16_t* instructionID, 660 struct InternalInstruction* insn, 661 uint8_t attrMask) { 662 BOOL hasModRMExtension; 663 664 uint8_t instructionClass; 665 666 instructionClass = contextForAttrs(attrMask); 667 668 hasModRMExtension = modRMRequired(insn->opcodeType, 669 instructionClass, 670 insn->opcode); 671 672 if (hasModRMExtension) { 673 if (readModRM(insn)) 674 return -1; 675 676 *instructionID = decode(insn->opcodeType, 677 instructionClass, 678 insn->opcode, 679 insn->modRM); 680 } else { 681 *instructionID = decode(insn->opcodeType, 682 instructionClass, 683 insn->opcode, 684 0); 685 } 686 687 return 0; 688} 689 690/* 691 * is16BitEquivalent - Determines whether two instruction names refer to 692 * equivalent instructions but one is 16-bit whereas the other is not. 693 * 694 * @param orig - The instruction that is not 16-bit 695 * @param equiv - The instruction that is 16-bit 696 */ 697static BOOL is16BitEquvalent(const char* orig, const char* equiv) { 698 off_t i; 699 700 for (i = 0;; i++) { 701 if (orig[i] == '\0' && equiv[i] == '\0') 702 return TRUE; 703 if (orig[i] == '\0' || equiv[i] == '\0') 704 return FALSE; 705 if (orig[i] != equiv[i]) { 706 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 707 continue; 708 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 709 continue; 710 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 711 continue; 712 return FALSE; 713 } 714 } 715} 716 717/* 718 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 719 * appropriate for extended and escape opcodes. Determines the attributes and 720 * context for the instruction before doing so. 721 * 722 * @param insn - The instruction whose ID is to be determined. 723 * @return - 0 if the ModR/M could be read when needed or was not needed; 724 * nonzero otherwise. 725 */ 726static int getID(struct InternalInstruction* insn, const void *miiArg) { 727 uint8_t attrMask; 728 uint16_t instructionID; 729 730 dbgprintf(insn, "getID()"); 731 732 attrMask = ATTR_NONE; 733 734 if (insn->mode == MODE_64BIT) 735 attrMask |= ATTR_64BIT; 736 737 if (insn->vexSize) { 738 attrMask |= ATTR_VEX; 739 740 if (insn->vexSize == 3) { 741 switch (ppFromVEX3of3(insn->vexPrefix[2])) { 742 case VEX_PREFIX_66: 743 attrMask |= ATTR_OPSIZE; 744 break; 745 case VEX_PREFIX_F3: 746 attrMask |= ATTR_XS; 747 break; 748 case VEX_PREFIX_F2: 749 attrMask |= ATTR_XD; 750 break; 751 } 752 753 if (lFromVEX3of3(insn->vexPrefix[2])) 754 attrMask |= ATTR_VEXL; 755 } 756 else if (insn->vexSize == 2) { 757 switch (ppFromVEX2of2(insn->vexPrefix[1])) { 758 case VEX_PREFIX_66: 759 attrMask |= ATTR_OPSIZE; 760 break; 761 case VEX_PREFIX_F3: 762 attrMask |= ATTR_XS; 763 break; 764 case VEX_PREFIX_F2: 765 attrMask |= ATTR_XD; 766 break; 767 } 768 769 if (lFromVEX2of2(insn->vexPrefix[1])) 770 attrMask |= ATTR_VEXL; 771 } 772 else { 773 return -1; 774 } 775 } 776 else { 777 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 778 attrMask |= ATTR_OPSIZE; 779 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 780 attrMask |= ATTR_ADSIZE; 781 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 782 attrMask |= ATTR_XS; 783 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 784 attrMask |= ATTR_XD; 785 } 786 787 if (insn->rexPrefix & 0x08) 788 attrMask |= ATTR_REXW; 789 790 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 791 return -1; 792 793 /* The following clauses compensate for limitations of the tables. */ 794 795 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && 796 !(attrMask & ATTR_OPSIZE)) { 797 /* 798 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit 799 * has precedence since there are no L-bit with W-bit entries in the tables. 800 * So if the L-bit isn't significant we should use the W-bit instead. 801 * We only need to do this if the instruction doesn't specify OpSize since 802 * there is a VEX_L_W_OPSIZE table. 803 */ 804 805 const struct InstructionSpecifier *spec; 806 uint16_t instructionIDWithWBit; 807 const struct InstructionSpecifier *specWithWBit; 808 809 spec = specifierForUID(instructionID); 810 811 if (getIDWithAttrMask(&instructionIDWithWBit, 812 insn, 813 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { 814 insn->instructionID = instructionID; 815 insn->spec = spec; 816 return 0; 817 } 818 819 specWithWBit = specifierForUID(instructionIDWithWBit); 820 821 if (instructionID != instructionIDWithWBit) { 822 insn->instructionID = instructionIDWithWBit; 823 insn->spec = specWithWBit; 824 } else { 825 insn->instructionID = instructionID; 826 insn->spec = spec; 827 } 828 return 0; 829 } 830 831 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 832 /* 833 * The instruction tables make no distinction between instructions that 834 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 835 * particular spot (i.e., many MMX operations). In general we're 836 * conservative, but in the specific case where OpSize is present but not 837 * in the right place we check if there's a 16-bit operation. 838 */ 839 840 const struct InstructionSpecifier *spec; 841 uint16_t instructionIDWithOpsize; 842 const char *specName, *specWithOpSizeName; 843 844 spec = specifierForUID(instructionID); 845 846 if (getIDWithAttrMask(&instructionIDWithOpsize, 847 insn, 848 attrMask | ATTR_OPSIZE)) { 849 /* 850 * ModRM required with OpSize but not present; give up and return version 851 * without OpSize set 852 */ 853 854 insn->instructionID = instructionID; 855 insn->spec = spec; 856 return 0; 857 } 858 859 specName = x86DisassemblerGetInstrName(instructionID, miiArg); 860 specWithOpSizeName = 861 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); 862 863 if (is16BitEquvalent(specName, specWithOpSizeName)) { 864 insn->instructionID = instructionIDWithOpsize; 865 insn->spec = specifierForUID(instructionIDWithOpsize); 866 } else { 867 insn->instructionID = instructionID; 868 insn->spec = spec; 869 } 870 return 0; 871 } 872 873 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 874 insn->rexPrefix & 0x01) { 875 /* 876 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 877 * it should decode as XCHG %r8, %eax. 878 */ 879 880 const struct InstructionSpecifier *spec; 881 uint16_t instructionIDWithNewOpcode; 882 const struct InstructionSpecifier *specWithNewOpcode; 883 884 spec = specifierForUID(instructionID); 885 886 /* Borrow opcode from one of the other XCHGar opcodes */ 887 insn->opcode = 0x91; 888 889 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 890 insn, 891 attrMask)) { 892 insn->opcode = 0x90; 893 894 insn->instructionID = instructionID; 895 insn->spec = spec; 896 return 0; 897 } 898 899 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 900 901 /* Change back */ 902 insn->opcode = 0x90; 903 904 insn->instructionID = instructionIDWithNewOpcode; 905 insn->spec = specWithNewOpcode; 906 907 return 0; 908 } 909 910 insn->instructionID = instructionID; 911 insn->spec = specifierForUID(insn->instructionID); 912 913 return 0; 914} 915 916/* 917 * readSIB - Consumes the SIB byte to determine addressing information for an 918 * instruction. 919 * 920 * @param insn - The instruction whose SIB byte is to be read. 921 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 922 */ 923static int readSIB(struct InternalInstruction* insn) { 924 SIBIndex sibIndexBase = 0; 925 SIBBase sibBaseBase = 0; 926 uint8_t index, base; 927 928 dbgprintf(insn, "readSIB()"); 929 930 if (insn->consumedSIB) 931 return 0; 932 933 insn->consumedSIB = TRUE; 934 935 switch (insn->addressSize) { 936 case 2: 937 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 938 return -1; 939 break; 940 case 4: 941 sibIndexBase = SIB_INDEX_EAX; 942 sibBaseBase = SIB_BASE_EAX; 943 break; 944 case 8: 945 sibIndexBase = SIB_INDEX_RAX; 946 sibBaseBase = SIB_BASE_RAX; 947 break; 948 } 949 950 if (consumeByte(insn, &insn->sib)) 951 return -1; 952 953 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 954 955 switch (index) { 956 case 0x4: 957 insn->sibIndex = SIB_INDEX_NONE; 958 break; 959 default: 960 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 961 if (insn->sibIndex == SIB_INDEX_sib || 962 insn->sibIndex == SIB_INDEX_sib64) 963 insn->sibIndex = SIB_INDEX_NONE; 964 break; 965 } 966 967 switch (scaleFromSIB(insn->sib)) { 968 case 0: 969 insn->sibScale = 1; 970 break; 971 case 1: 972 insn->sibScale = 2; 973 break; 974 case 2: 975 insn->sibScale = 4; 976 break; 977 case 3: 978 insn->sibScale = 8; 979 break; 980 } 981 982 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 983 984 switch (base) { 985 case 0x5: 986 switch (modFromModRM(insn->modRM)) { 987 case 0x0: 988 insn->eaDisplacement = EA_DISP_32; 989 insn->sibBase = SIB_BASE_NONE; 990 break; 991 case 0x1: 992 insn->eaDisplacement = EA_DISP_8; 993 insn->sibBase = (insn->addressSize == 4 ? 994 SIB_BASE_EBP : SIB_BASE_RBP); 995 break; 996 case 0x2: 997 insn->eaDisplacement = EA_DISP_32; 998 insn->sibBase = (insn->addressSize == 4 ? 999 SIB_BASE_EBP : SIB_BASE_RBP); 1000 break; 1001 case 0x3: 1002 debug("Cannot have Mod = 0b11 and a SIB byte"); 1003 return -1; 1004 } 1005 break; 1006 default: 1007 insn->sibBase = (SIBBase)(sibBaseBase + base); 1008 break; 1009 } 1010 1011 return 0; 1012} 1013 1014/* 1015 * readDisplacement - Consumes the displacement of an instruction. 1016 * 1017 * @param insn - The instruction whose displacement is to be read. 1018 * @return - 0 if the displacement byte was successfully read; nonzero 1019 * otherwise. 1020 */ 1021static int readDisplacement(struct InternalInstruction* insn) { 1022 int8_t d8; 1023 int16_t d16; 1024 int32_t d32; 1025 1026 dbgprintf(insn, "readDisplacement()"); 1027 1028 if (insn->consumedDisplacement) 1029 return 0; 1030 1031 insn->consumedDisplacement = TRUE; 1032 insn->displacementOffset = insn->readerCursor - insn->startLocation; 1033 1034 switch (insn->eaDisplacement) { 1035 case EA_DISP_NONE: 1036 insn->consumedDisplacement = FALSE; 1037 break; 1038 case EA_DISP_8: 1039 if (consumeInt8(insn, &d8)) 1040 return -1; 1041 insn->displacement = d8; 1042 break; 1043 case EA_DISP_16: 1044 if (consumeInt16(insn, &d16)) 1045 return -1; 1046 insn->displacement = d16; 1047 break; 1048 case EA_DISP_32: 1049 if (consumeInt32(insn, &d32)) 1050 return -1; 1051 insn->displacement = d32; 1052 break; 1053 } 1054 1055 insn->consumedDisplacement = TRUE; 1056 return 0; 1057} 1058 1059/* 1060 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1061 * displacement) for an instruction and interprets it. 1062 * 1063 * @param insn - The instruction whose addressing information is to be read. 1064 * @return - 0 if the information was successfully read; nonzero otherwise. 1065 */ 1066static int readModRM(struct InternalInstruction* insn) { 1067 uint8_t mod, rm, reg; 1068 1069 dbgprintf(insn, "readModRM()"); 1070 1071 if (insn->consumedModRM) 1072 return 0; 1073 1074 if (consumeByte(insn, &insn->modRM)) 1075 return -1; 1076 insn->consumedModRM = TRUE; 1077 1078 mod = modFromModRM(insn->modRM); 1079 rm = rmFromModRM(insn->modRM); 1080 reg = regFromModRM(insn->modRM); 1081 1082 /* 1083 * This goes by insn->registerSize to pick the correct register, which messes 1084 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1085 * fixupReg(). 1086 */ 1087 switch (insn->registerSize) { 1088 case 2: 1089 insn->regBase = MODRM_REG_AX; 1090 insn->eaRegBase = EA_REG_AX; 1091 break; 1092 case 4: 1093 insn->regBase = MODRM_REG_EAX; 1094 insn->eaRegBase = EA_REG_EAX; 1095 break; 1096 case 8: 1097 insn->regBase = MODRM_REG_RAX; 1098 insn->eaRegBase = EA_REG_RAX; 1099 break; 1100 } 1101 1102 reg |= rFromREX(insn->rexPrefix) << 3; 1103 rm |= bFromREX(insn->rexPrefix) << 3; 1104 1105 insn->reg = (Reg)(insn->regBase + reg); 1106 1107 switch (insn->addressSize) { 1108 case 2: 1109 insn->eaBaseBase = EA_BASE_BX_SI; 1110 1111 switch (mod) { 1112 case 0x0: 1113 if (rm == 0x6) { 1114 insn->eaBase = EA_BASE_NONE; 1115 insn->eaDisplacement = EA_DISP_16; 1116 if (readDisplacement(insn)) 1117 return -1; 1118 } else { 1119 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1120 insn->eaDisplacement = EA_DISP_NONE; 1121 } 1122 break; 1123 case 0x1: 1124 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1125 insn->eaDisplacement = EA_DISP_8; 1126 if (readDisplacement(insn)) 1127 return -1; 1128 break; 1129 case 0x2: 1130 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1131 insn->eaDisplacement = EA_DISP_16; 1132 if (readDisplacement(insn)) 1133 return -1; 1134 break; 1135 case 0x3: 1136 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1137 if (readDisplacement(insn)) 1138 return -1; 1139 break; 1140 } 1141 break; 1142 case 4: 1143 case 8: 1144 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1145 1146 switch (mod) { 1147 case 0x0: 1148 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1149 switch (rm) { 1150 case 0x4: 1151 case 0xc: /* in case REXW.b is set */ 1152 insn->eaBase = (insn->addressSize == 4 ? 1153 EA_BASE_sib : EA_BASE_sib64); 1154 readSIB(insn); 1155 if (readDisplacement(insn)) 1156 return -1; 1157 break; 1158 case 0x5: 1159 insn->eaBase = EA_BASE_NONE; 1160 insn->eaDisplacement = EA_DISP_32; 1161 if (readDisplacement(insn)) 1162 return -1; 1163 break; 1164 default: 1165 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1166 break; 1167 } 1168 break; 1169 case 0x1: 1170 case 0x2: 1171 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1172 switch (rm) { 1173 case 0x4: 1174 case 0xc: /* in case REXW.b is set */ 1175 insn->eaBase = EA_BASE_sib; 1176 readSIB(insn); 1177 if (readDisplacement(insn)) 1178 return -1; 1179 break; 1180 default: 1181 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1182 if (readDisplacement(insn)) 1183 return -1; 1184 break; 1185 } 1186 break; 1187 case 0x3: 1188 insn->eaDisplacement = EA_DISP_NONE; 1189 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1190 break; 1191 } 1192 break; 1193 } /* switch (insn->addressSize) */ 1194 1195 return 0; 1196} 1197 1198#define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1199 static uint8_t name(struct InternalInstruction *insn, \ 1200 OperandType type, \ 1201 uint8_t index, \ 1202 uint8_t *valid) { \ 1203 *valid = 1; \ 1204 switch (type) { \ 1205 default: \ 1206 debug("Unhandled register type"); \ 1207 *valid = 0; \ 1208 return 0; \ 1209 case TYPE_Rv: \ 1210 return base + index; \ 1211 case TYPE_R8: \ 1212 if (insn->rexPrefix && \ 1213 index >= 4 && index <= 7) { \ 1214 return prefix##_SPL + (index - 4); \ 1215 } else { \ 1216 return prefix##_AL + index; \ 1217 } \ 1218 case TYPE_R16: \ 1219 return prefix##_AX + index; \ 1220 case TYPE_R32: \ 1221 return prefix##_EAX + index; \ 1222 case TYPE_R64: \ 1223 return prefix##_RAX + index; \ 1224 case TYPE_XMM256: \ 1225 return prefix##_YMM0 + index; \ 1226 case TYPE_XMM128: \ 1227 case TYPE_XMM64: \ 1228 case TYPE_XMM32: \ 1229 case TYPE_XMM: \ 1230 return prefix##_XMM0 + index; \ 1231 case TYPE_MM64: \ 1232 case TYPE_MM32: \ 1233 case TYPE_MM: \ 1234 if (index > 7) \ 1235 *valid = 0; \ 1236 return prefix##_MM0 + index; \ 1237 case TYPE_SEGMENTREG: \ 1238 if (index > 5) \ 1239 *valid = 0; \ 1240 return prefix##_ES + index; \ 1241 case TYPE_DEBUGREG: \ 1242 if (index > 7) \ 1243 *valid = 0; \ 1244 return prefix##_DR0 + index; \ 1245 case TYPE_CONTROLREG: \ 1246 if (index > 8) \ 1247 *valid = 0; \ 1248 return prefix##_CR0 + index; \ 1249 } \ 1250 } 1251 1252/* 1253 * fixup*Value - Consults an operand type to determine the meaning of the 1254 * reg or R/M field. If the operand is an XMM operand, for example, an 1255 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1256 * misinterpret it as. 1257 * 1258 * @param insn - The instruction containing the operand. 1259 * @param type - The operand type. 1260 * @param index - The existing value of the field as reported by readModRM(). 1261 * @param valid - The address of a uint8_t. The target is set to 1 if the 1262 * field is valid for the register class; 0 if not. 1263 * @return - The proper value. 1264 */ 1265GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1266GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1267 1268/* 1269 * fixupReg - Consults an operand specifier to determine which of the 1270 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1271 * 1272 * @param insn - See fixup*Value(). 1273 * @param op - The operand specifier. 1274 * @return - 0 if fixup was successful; -1 if the register returned was 1275 * invalid for its class. 1276 */ 1277static int fixupReg(struct InternalInstruction *insn, 1278 const struct OperandSpecifier *op) { 1279 uint8_t valid; 1280 1281 dbgprintf(insn, "fixupReg()"); 1282 1283 switch ((OperandEncoding)op->encoding) { 1284 default: 1285 debug("Expected a REG or R/M encoding in fixupReg"); 1286 return -1; 1287 case ENCODING_VVVV: 1288 insn->vvvv = (Reg)fixupRegValue(insn, 1289 (OperandType)op->type, 1290 insn->vvvv, 1291 &valid); 1292 if (!valid) 1293 return -1; 1294 break; 1295 case ENCODING_REG: 1296 insn->reg = (Reg)fixupRegValue(insn, 1297 (OperandType)op->type, 1298 insn->reg - insn->regBase, 1299 &valid); 1300 if (!valid) 1301 return -1; 1302 break; 1303 case ENCODING_RM: 1304 if (insn->eaBase >= insn->eaRegBase) { 1305 insn->eaBase = (EABase)fixupRMValue(insn, 1306 (OperandType)op->type, 1307 insn->eaBase - insn->eaRegBase, 1308 &valid); 1309 if (!valid) 1310 return -1; 1311 } 1312 break; 1313 } 1314 1315 return 0; 1316} 1317 1318/* 1319 * readOpcodeModifier - Reads an operand from the opcode field of an 1320 * instruction. Handles AddRegFrm instructions. 1321 * 1322 * @param insn - The instruction whose opcode field is to be read. 1323 * @param inModRM - Indicates that the opcode field is to be read from the 1324 * ModR/M extension; useful for escape opcodes 1325 * @return - 0 on success; nonzero otherwise. 1326 */ 1327static int readOpcodeModifier(struct InternalInstruction* insn) { 1328 dbgprintf(insn, "readOpcodeModifier()"); 1329 1330 if (insn->consumedOpcodeModifier) 1331 return 0; 1332 1333 insn->consumedOpcodeModifier = TRUE; 1334 1335 switch (insn->spec->modifierType) { 1336 default: 1337 debug("Unknown modifier type."); 1338 return -1; 1339 case MODIFIER_NONE: 1340 debug("No modifier but an operand expects one."); 1341 return -1; 1342 case MODIFIER_OPCODE: 1343 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1344 return 0; 1345 case MODIFIER_MODRM: 1346 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1347 return 0; 1348 } 1349} 1350 1351/* 1352 * readOpcodeRegister - Reads an operand from the opcode field of an 1353 * instruction and interprets it appropriately given the operand width. 1354 * Handles AddRegFrm instructions. 1355 * 1356 * @param insn - See readOpcodeModifier(). 1357 * @param size - The width (in bytes) of the register being specified. 1358 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1359 * RAX. 1360 * @return - 0 on success; nonzero otherwise. 1361 */ 1362static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1363 dbgprintf(insn, "readOpcodeRegister()"); 1364 1365 if (readOpcodeModifier(insn)) 1366 return -1; 1367 1368 if (size == 0) 1369 size = insn->registerSize; 1370 1371 switch (size) { 1372 case 1: 1373 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1374 | insn->opcodeModifier)); 1375 if (insn->rexPrefix && 1376 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1377 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1378 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1379 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1380 } 1381 1382 break; 1383 case 2: 1384 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1385 + ((bFromREX(insn->rexPrefix) << 3) 1386 | insn->opcodeModifier)); 1387 break; 1388 case 4: 1389 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1390 + ((bFromREX(insn->rexPrefix) << 3) 1391 | insn->opcodeModifier)); 1392 break; 1393 case 8: 1394 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1395 + ((bFromREX(insn->rexPrefix) << 3) 1396 | insn->opcodeModifier)); 1397 break; 1398 } 1399 1400 return 0; 1401} 1402 1403/* 1404 * readImmediate - Consumes an immediate operand from an instruction, given the 1405 * desired operand size. 1406 * 1407 * @param insn - The instruction whose operand is to be read. 1408 * @param size - The width (in bytes) of the operand. 1409 * @return - 0 if the immediate was successfully consumed; nonzero 1410 * otherwise. 1411 */ 1412static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1413 uint8_t imm8; 1414 uint16_t imm16; 1415 uint32_t imm32; 1416 uint64_t imm64; 1417 1418 dbgprintf(insn, "readImmediate()"); 1419 1420 if (insn->numImmediatesConsumed == 2) { 1421 debug("Already consumed two immediates"); 1422 return -1; 1423 } 1424 1425 if (size == 0) 1426 size = insn->immediateSize; 1427 else 1428 insn->immediateSize = size; 1429 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1430 1431 switch (size) { 1432 case 1: 1433 if (consumeByte(insn, &imm8)) 1434 return -1; 1435 insn->immediates[insn->numImmediatesConsumed] = imm8; 1436 break; 1437 case 2: 1438 if (consumeUInt16(insn, &imm16)) 1439 return -1; 1440 insn->immediates[insn->numImmediatesConsumed] = imm16; 1441 break; 1442 case 4: 1443 if (consumeUInt32(insn, &imm32)) 1444 return -1; 1445 insn->immediates[insn->numImmediatesConsumed] = imm32; 1446 break; 1447 case 8: 1448 if (consumeUInt64(insn, &imm64)) 1449 return -1; 1450 insn->immediates[insn->numImmediatesConsumed] = imm64; 1451 break; 1452 } 1453 1454 insn->numImmediatesConsumed++; 1455 1456 return 0; 1457} 1458 1459/* 1460 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1461 * 1462 * @param insn - The instruction whose operand is to be read. 1463 * @return - 0 if the vvvv was successfully consumed; nonzero 1464 * otherwise. 1465 */ 1466static int readVVVV(struct InternalInstruction* insn) { 1467 dbgprintf(insn, "readVVVV()"); 1468 1469 if (insn->vexSize == 3) 1470 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); 1471 else if (insn->vexSize == 2) 1472 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); 1473 else 1474 return -1; 1475 1476 if (insn->mode != MODE_64BIT) 1477 insn->vvvv &= 0x7; 1478 1479 return 0; 1480} 1481 1482/* 1483 * readOperands - Consults the specifier for an instruction and consumes all 1484 * operands for that instruction, interpreting them as it goes. 1485 * 1486 * @param insn - The instruction whose operands are to be read and interpreted. 1487 * @return - 0 if all operands could be read; nonzero otherwise. 1488 */ 1489static int readOperands(struct InternalInstruction* insn) { 1490 int index; 1491 int hasVVVV, needVVVV; 1492 int sawRegImm = 0; 1493 1494 dbgprintf(insn, "readOperands()"); 1495 1496 /* If non-zero vvvv specified, need to make sure one of the operands 1497 uses it. */ 1498 hasVVVV = !readVVVV(insn); 1499 needVVVV = hasVVVV && (insn->vvvv != 0); 1500 1501 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1502 switch (x86OperandSets[insn->spec->operands][index].encoding) { 1503 case ENCODING_NONE: 1504 break; 1505 case ENCODING_REG: 1506 case ENCODING_RM: 1507 if (readModRM(insn)) 1508 return -1; 1509 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1510 return -1; 1511 break; 1512 case ENCODING_CB: 1513 case ENCODING_CW: 1514 case ENCODING_CD: 1515 case ENCODING_CP: 1516 case ENCODING_CO: 1517 case ENCODING_CT: 1518 dbgprintf(insn, "We currently don't hande code-offset encodings"); 1519 return -1; 1520 case ENCODING_IB: 1521 if (sawRegImm) { 1522 /* Saw a register immediate so don't read again and instead split the 1523 previous immediate. FIXME: This is a hack. */ 1524 insn->immediates[insn->numImmediatesConsumed] = 1525 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1526 ++insn->numImmediatesConsumed; 1527 break; 1528 } 1529 if (readImmediate(insn, 1)) 1530 return -1; 1531 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && 1532 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1533 return -1; 1534 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && 1535 insn->immediates[insn->numImmediatesConsumed - 1] > 31) 1536 return -1; 1537 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || 1538 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) 1539 sawRegImm = 1; 1540 break; 1541 case ENCODING_IW: 1542 if (readImmediate(insn, 2)) 1543 return -1; 1544 break; 1545 case ENCODING_ID: 1546 if (readImmediate(insn, 4)) 1547 return -1; 1548 break; 1549 case ENCODING_IO: 1550 if (readImmediate(insn, 8)) 1551 return -1; 1552 break; 1553 case ENCODING_Iv: 1554 if (readImmediate(insn, insn->immediateSize)) 1555 return -1; 1556 break; 1557 case ENCODING_Ia: 1558 if (readImmediate(insn, insn->addressSize)) 1559 return -1; 1560 break; 1561 case ENCODING_RB: 1562 if (readOpcodeRegister(insn, 1)) 1563 return -1; 1564 break; 1565 case ENCODING_RW: 1566 if (readOpcodeRegister(insn, 2)) 1567 return -1; 1568 break; 1569 case ENCODING_RD: 1570 if (readOpcodeRegister(insn, 4)) 1571 return -1; 1572 break; 1573 case ENCODING_RO: 1574 if (readOpcodeRegister(insn, 8)) 1575 return -1; 1576 break; 1577 case ENCODING_Rv: 1578 if (readOpcodeRegister(insn, 0)) 1579 return -1; 1580 break; 1581 case ENCODING_I: 1582 if (readOpcodeModifier(insn)) 1583 return -1; 1584 break; 1585 case ENCODING_VVVV: 1586 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1587 if (!hasVVVV) 1588 return -1; 1589 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1590 return -1; 1591 break; 1592 case ENCODING_DUP: 1593 break; 1594 default: 1595 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1596 return -1; 1597 } 1598 } 1599 1600 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1601 if (needVVVV) return -1; 1602 1603 return 0; 1604} 1605 1606/* 1607 * decodeInstruction - Reads and interprets a full instruction provided by the 1608 * user. 1609 * 1610 * @param insn - A pointer to the instruction to be populated. Must be 1611 * pre-allocated. 1612 * @param reader - The function to be used to read the instruction's bytes. 1613 * @param readerArg - A generic argument to be passed to the reader to store 1614 * any internal state. 1615 * @param logger - If non-NULL, the function to be used to write log messages 1616 * and warnings. 1617 * @param loggerArg - A generic argument to be passed to the logger to store 1618 * any internal state. 1619 * @param startLoc - The address (in the reader's address space) of the first 1620 * byte in the instruction. 1621 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1622 * decode the instruction in. 1623 * @return - 0 if the instruction's memory could be read; nonzero if 1624 * not. 1625 */ 1626int decodeInstruction(struct InternalInstruction* insn, 1627 byteReader_t reader, 1628 const void* readerArg, 1629 dlog_t logger, 1630 void* loggerArg, 1631 const void* miiArg, 1632 uint64_t startLoc, 1633 DisassemblerMode mode) { 1634 memset(insn, 0, sizeof(struct InternalInstruction)); 1635 1636 insn->reader = reader; 1637 insn->readerArg = readerArg; 1638 insn->dlog = logger; 1639 insn->dlogArg = loggerArg; 1640 insn->startLocation = startLoc; 1641 insn->readerCursor = startLoc; 1642 insn->mode = mode; 1643 insn->numImmediatesConsumed = 0; 1644 1645 if (readPrefixes(insn) || 1646 readOpcode(insn) || 1647 getID(insn, miiArg) || 1648 insn->instructionID == 0 || 1649 readOperands(insn)) 1650 return -1; 1651 1652 insn->operands = &x86OperandSets[insn->spec->operands][0]; 1653 1654 insn->length = insn->readerCursor - insn->startLocation; 1655 1656 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1657 startLoc, insn->readerCursor, insn->length); 1658 1659 if (insn->length > 15) 1660 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1661 1662 return 0; 1663} 1664