1/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16#include <stdarg.h> /* for va_*() */ 17#include <stdio.h> /* for vsnprintf() */ 18#include <stdlib.h> /* for exit() */ 19#include <string.h> /* for memset() */ 20 21#include "X86DisassemblerDecoder.h" 22 23#include "X86GenDisassemblerTables.inc" 24 25#define TRUE 1 26#define FALSE 0 27 28#ifndef NDEBUG 29#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 30#else 31#define debug(s) do { } while (0) 32#endif 33 34 35/* 36 * contextForAttrs - Client for the instruction context table. Takes a set of 37 * attributes and returns the appropriate decode context. 38 * 39 * @param attrMask - Attributes, from the enumeration attributeBits. 40 * @return - The InstructionContext to use when looking up an 41 * an instruction with these attributes. 42 */ 43static InstructionContext contextForAttrs(uint8_t attrMask) { 44 return CONTEXTS_SYM[attrMask]; 45} 46 47/* 48 * modRMRequired - Reads the appropriate instruction table to determine whether 49 * the ModR/M byte is required to decode a particular instruction. 50 * 51 * @param type - The opcode type (i.e., how many bytes it has). 52 * @param insnContext - The context for the instruction, as returned by 53 * contextForAttrs. 54 * @param opcode - The last byte of the instruction's opcode, not counting 55 * ModR/M extensions and escapes. 56 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 57 */ 58static int modRMRequired(OpcodeType type, 59 InstructionContext insnContext, 60 uint8_t opcode) { 61 const struct ContextDecision* decision = 0; 62 63 switch (type) { 64 case ONEBYTE: 65 decision = &ONEBYTE_SYM; 66 break; 67 case TWOBYTE: 68 decision = &TWOBYTE_SYM; 69 break; 70 case THREEBYTE_38: 71 decision = &THREEBYTE38_SYM; 72 break; 73 case THREEBYTE_3A: 74 decision = &THREEBYTE3A_SYM; 75 break; 76 case THREEBYTE_A6: 77 decision = &THREEBYTEA6_SYM; 78 break; 79 case THREEBYTE_A7: 80 decision = &THREEBYTEA7_SYM; 81 break; 82 case XOP8_MAP: 83 decision = &XOP8_MAP_SYM; 84 break; 85 case XOP9_MAP: 86 decision = &XOP9_MAP_SYM; 87 break; 88 case XOPA_MAP: 89 decision = &XOPA_MAP_SYM; 90 break; 91 } 92 93 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 94 modrm_type != MODRM_ONEENTRY; 95} 96 97/* 98 * decode - Reads the appropriate instruction table to obtain the unique ID of 99 * an instruction. 100 * 101 * @param type - See modRMRequired(). 102 * @param insnContext - See modRMRequired(). 103 * @param opcode - See modRMRequired(). 104 * @param modRM - The ModR/M byte if required, or any value if not. 105 * @return - The UID of the instruction, or 0 on failure. 106 */ 107static InstrUID decode(OpcodeType type, 108 InstructionContext insnContext, 109 uint8_t opcode, 110 uint8_t modRM) { 111 const struct ModRMDecision* dec = 0; 112 113 switch (type) { 114 case ONEBYTE: 115 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 116 break; 117 case TWOBYTE: 118 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 119 break; 120 case THREEBYTE_38: 121 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 122 break; 123 case THREEBYTE_3A: 124 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 125 break; 126 case THREEBYTE_A6: 127 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 128 break; 129 case THREEBYTE_A7: 130 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 131 break; 132 case XOP8_MAP: 133 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 134 break; 135 case XOP9_MAP: 136 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 137 break; 138 case XOPA_MAP: 139 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 140 break; 141 } 142 143 switch (dec->modrm_type) { 144 default: 145 debug("Corrupt table! Unknown modrm_type"); 146 return 0; 147 case MODRM_ONEENTRY: 148 return modRMTable[dec->instructionIDs]; 149 case MODRM_SPLITRM: 150 if (modFromModRM(modRM) == 0x3) 151 return modRMTable[dec->instructionIDs+1]; 152 return modRMTable[dec->instructionIDs]; 153 case MODRM_SPLITREG: 154 if (modFromModRM(modRM) == 0x3) 155 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 156 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 157 case MODRM_SPLITMISC: 158 if (modFromModRM(modRM) == 0x3) 159 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 160 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 161 case MODRM_FULL: 162 return modRMTable[dec->instructionIDs+modRM]; 163 } 164} 165 166/* 167 * specifierForUID - Given a UID, returns the name and operand specification for 168 * that instruction. 169 * 170 * @param uid - The unique ID for the instruction. This should be returned by 171 * decode(); specifierForUID will not check bounds. 172 * @return - A pointer to the specification for that instruction. 173 */ 174static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 175 return &INSTRUCTIONS_SYM[uid]; 176} 177 178/* 179 * consumeByte - Uses the reader function provided by the user to consume one 180 * byte from the instruction's memory and advance the cursor. 181 * 182 * @param insn - The instruction with the reader function to use. The cursor 183 * for this instruction is advanced. 184 * @param byte - A pointer to a pre-allocated memory buffer to be populated 185 * with the data read. 186 * @return - 0 if the read was successful; nonzero otherwise. 187 */ 188static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 189 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 190 191 if (!ret) 192 ++(insn->readerCursor); 193 194 return ret; 195} 196 197/* 198 * lookAtByte - Like consumeByte, but does not advance the cursor. 199 * 200 * @param insn - See consumeByte(). 201 * @param byte - See consumeByte(). 202 * @return - See consumeByte(). 203 */ 204static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 205 return insn->reader(insn->readerArg, byte, insn->readerCursor); 206} 207 208static void unconsumeByte(struct InternalInstruction* insn) { 209 insn->readerCursor--; 210} 211 212#define CONSUME_FUNC(name, type) \ 213 static int name(struct InternalInstruction* insn, type* ptr) { \ 214 type combined = 0; \ 215 unsigned offset; \ 216 for (offset = 0; offset < sizeof(type); ++offset) { \ 217 uint8_t byte; \ 218 int ret = insn->reader(insn->readerArg, \ 219 &byte, \ 220 insn->readerCursor + offset); \ 221 if (ret) \ 222 return ret; \ 223 combined = combined | ((uint64_t)byte << (offset * 8)); \ 224 } \ 225 *ptr = combined; \ 226 insn->readerCursor += sizeof(type); \ 227 return 0; \ 228 } 229 230/* 231 * consume* - Use the reader function provided by the user to consume data 232 * values of various sizes from the instruction's memory and advance the 233 * cursor appropriately. These readers perform endian conversion. 234 * 235 * @param insn - See consumeByte(). 236 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 237 * be populated with the data read. 238 * @return - See consumeByte(). 239 */ 240CONSUME_FUNC(consumeInt8, int8_t) 241CONSUME_FUNC(consumeInt16, int16_t) 242CONSUME_FUNC(consumeInt32, int32_t) 243CONSUME_FUNC(consumeUInt16, uint16_t) 244CONSUME_FUNC(consumeUInt32, uint32_t) 245CONSUME_FUNC(consumeUInt64, uint64_t) 246 247/* 248 * dbgprintf - Uses the logging function provided by the user to log a single 249 * message, typically without a carriage-return. 250 * 251 * @param insn - The instruction containing the logging function. 252 * @param format - See printf(). 253 * @param ... - See printf(). 254 */ 255static void dbgprintf(struct InternalInstruction* insn, 256 const char* format, 257 ...) { 258 char buffer[256]; 259 va_list ap; 260 261 if (!insn->dlog) 262 return; 263 264 va_start(ap, format); 265 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 266 va_end(ap); 267 268 insn->dlog(insn->dlogArg, buffer); 269 270 return; 271} 272 273/* 274 * setPrefixPresent - Marks that a particular prefix is present at a particular 275 * location. 276 * 277 * @param insn - The instruction to be marked as having the prefix. 278 * @param prefix - The prefix that is present. 279 * @param location - The location where the prefix is located (in the address 280 * space of the instruction's reader). 281 */ 282static void setPrefixPresent(struct InternalInstruction* insn, 283 uint8_t prefix, 284 uint64_t location) 285{ 286 insn->prefixPresent[prefix] = 1; 287 insn->prefixLocations[prefix] = location; 288} 289 290/* 291 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 292 * present at a given location. 293 * 294 * @param insn - The instruction to be queried. 295 * @param prefix - The prefix. 296 * @param location - The location to query. 297 * @return - Whether the prefix is at that location. 298 */ 299static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 300 uint8_t prefix, 301 uint64_t location) 302{ 303 if (insn->prefixPresent[prefix] == 1 && 304 insn->prefixLocations[prefix] == location) 305 return TRUE; 306 else 307 return FALSE; 308} 309 310/* 311 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 312 * instruction as having them. Also sets the instruction's default operand, 313 * address, and other relevant data sizes to report operands correctly. 314 * 315 * @param insn - The instruction whose prefixes are to be read. 316 * @return - 0 if the instruction could be read until the end of the prefix 317 * bytes, and no prefixes conflicted; nonzero otherwise. 318 */ 319static int readPrefixes(struct InternalInstruction* insn) { 320 BOOL isPrefix = TRUE; 321 BOOL prefixGroups[4] = { FALSE }; 322 uint64_t prefixLocation; 323 uint8_t byte = 0; 324 uint8_t nextByte; 325 326 BOOL hasAdSize = FALSE; 327 BOOL hasOpSize = FALSE; 328 329 dbgprintf(insn, "readPrefixes()"); 330 331 while (isPrefix) { 332 prefixLocation = insn->readerCursor; 333 334 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ 335 if (consumeByte(insn, &byte)) 336 break; 337 338 /* 339 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 340 * break and let it be disassembled as a normal "instruction". 341 */ 342 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) 343 break; 344 345 if (insn->readerCursor - 1 == insn->startLocation 346 && (byte == 0xf2 || byte == 0xf3) 347 && !lookAtByte(insn, &nextByte)) 348 { 349 /* 350 * If the byte is 0xf2 or 0xf3, and any of the following conditions are 351 * met: 352 * - it is followed by a LOCK (0xf0) prefix 353 * - it is followed by an xchg instruction 354 * then it should be disassembled as a xacquire/xrelease not repne/rep. 355 */ 356 if ((byte == 0xf2 || byte == 0xf3) && 357 ((nextByte == 0xf0) | 358 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) 359 insn->xAcquireRelease = TRUE; 360 /* 361 * Also if the byte is 0xf3, and the following condition is met: 362 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 363 * "mov mem, imm" (opcode 0xc6/0xc7) instructions. 364 * then it should be disassembled as an xrelease not rep. 365 */ 366 if (byte == 0xf3 && 367 (nextByte == 0x88 || nextByte == 0x89 || 368 nextByte == 0xc6 || nextByte == 0xc7)) 369 insn->xAcquireRelease = TRUE; 370 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 371 if (consumeByte(insn, &nextByte)) 372 return -1; 373 if (lookAtByte(insn, &nextByte)) 374 return -1; 375 unconsumeByte(insn); 376 } 377 if (nextByte != 0x0f && nextByte != 0x90) 378 break; 379 } 380 381 switch (byte) { 382 case 0xf0: /* LOCK */ 383 case 0xf2: /* REPNE/REPNZ */ 384 case 0xf3: /* REP or REPE/REPZ */ 385 if (prefixGroups[0]) 386 dbgprintf(insn, "Redundant Group 1 prefix"); 387 prefixGroups[0] = TRUE; 388 setPrefixPresent(insn, byte, prefixLocation); 389 break; 390 case 0x2e: /* CS segment override -OR- Branch not taken */ 391 case 0x36: /* SS segment override -OR- Branch taken */ 392 case 0x3e: /* DS segment override */ 393 case 0x26: /* ES segment override */ 394 case 0x64: /* FS segment override */ 395 case 0x65: /* GS segment override */ 396 switch (byte) { 397 case 0x2e: 398 insn->segmentOverride = SEG_OVERRIDE_CS; 399 break; 400 case 0x36: 401 insn->segmentOverride = SEG_OVERRIDE_SS; 402 break; 403 case 0x3e: 404 insn->segmentOverride = SEG_OVERRIDE_DS; 405 break; 406 case 0x26: 407 insn->segmentOverride = SEG_OVERRIDE_ES; 408 break; 409 case 0x64: 410 insn->segmentOverride = SEG_OVERRIDE_FS; 411 break; 412 case 0x65: 413 insn->segmentOverride = SEG_OVERRIDE_GS; 414 break; 415 default: 416 debug("Unhandled override"); 417 return -1; 418 } 419 if (prefixGroups[1]) 420 dbgprintf(insn, "Redundant Group 2 prefix"); 421 prefixGroups[1] = TRUE; 422 setPrefixPresent(insn, byte, prefixLocation); 423 break; 424 case 0x66: /* Operand-size override */ 425 if (prefixGroups[2]) 426 dbgprintf(insn, "Redundant Group 3 prefix"); 427 prefixGroups[2] = TRUE; 428 hasOpSize = TRUE; 429 setPrefixPresent(insn, byte, prefixLocation); 430 break; 431 case 0x67: /* Address-size override */ 432 if (prefixGroups[3]) 433 dbgprintf(insn, "Redundant Group 4 prefix"); 434 prefixGroups[3] = TRUE; 435 hasAdSize = TRUE; 436 setPrefixPresent(insn, byte, prefixLocation); 437 break; 438 default: /* Not a prefix byte */ 439 isPrefix = FALSE; 440 break; 441 } 442 443 if (isPrefix) 444 dbgprintf(insn, "Found prefix 0x%hhx", byte); 445 } 446 447 insn->vexXopType = TYPE_NO_VEX_XOP; 448 449 if (byte == 0xc4) { 450 uint8_t byte1; 451 452 if (lookAtByte(insn, &byte1)) { 453 dbgprintf(insn, "Couldn't read second byte of VEX"); 454 return -1; 455 } 456 457 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 458 insn->vexXopType = TYPE_VEX_3B; 459 insn->necessaryPrefixLocation = insn->readerCursor - 1; 460 } 461 else { 462 unconsumeByte(insn); 463 insn->necessaryPrefixLocation = insn->readerCursor - 1; 464 } 465 466 if (insn->vexXopType == TYPE_VEX_3B) { 467 insn->vexXopPrefix[0] = byte; 468 consumeByte(insn, &insn->vexXopPrefix[1]); 469 consumeByte(insn, &insn->vexXopPrefix[2]); 470 471 /* We simulate the REX prefix for simplicity's sake */ 472 473 if (insn->mode == MODE_64BIT) { 474 insn->rexPrefix = 0x40 475 | (wFromVEX3of3(insn->vexXopPrefix[2]) << 3) 476 | (rFromVEX2of3(insn->vexXopPrefix[1]) << 2) 477 | (xFromVEX2of3(insn->vexXopPrefix[1]) << 1) 478 | (bFromVEX2of3(insn->vexXopPrefix[1]) << 0); 479 } 480 481 switch (ppFromVEX3of3(insn->vexXopPrefix[2])) 482 { 483 default: 484 break; 485 case VEX_PREFIX_66: 486 hasOpSize = TRUE; 487 break; 488 } 489 490 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 491 insn->vexXopPrefix[0], insn->vexXopPrefix[1], 492 insn->vexXopPrefix[2]); 493 } 494 } 495 else if (byte == 0xc5) { 496 uint8_t byte1; 497 498 if (lookAtByte(insn, &byte1)) { 499 dbgprintf(insn, "Couldn't read second byte of VEX"); 500 return -1; 501 } 502 503 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 504 insn->vexXopType = TYPE_VEX_2B; 505 } 506 else { 507 unconsumeByte(insn); 508 } 509 510 if (insn->vexXopType == TYPE_VEX_2B) { 511 insn->vexXopPrefix[0] = byte; 512 consumeByte(insn, &insn->vexXopPrefix[1]); 513 514 if (insn->mode == MODE_64BIT) { 515 insn->rexPrefix = 0x40 516 | (rFromVEX2of2(insn->vexXopPrefix[1]) << 2); 517 } 518 519 switch (ppFromVEX2of2(insn->vexXopPrefix[1])) 520 { 521 default: 522 break; 523 case VEX_PREFIX_66: 524 hasOpSize = TRUE; 525 break; 526 } 527 528 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexXopPrefix[0], insn->vexXopPrefix[1]); 529 } 530 } 531 else if (byte == 0x8f) { 532 uint8_t byte1; 533 534 if (lookAtByte(insn, &byte1)) { 535 dbgprintf(insn, "Couldn't read second byte of XOP"); 536 return -1; 537 } 538 539 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ 540 insn->vexXopType = TYPE_XOP; 541 insn->necessaryPrefixLocation = insn->readerCursor - 1; 542 } 543 else { 544 unconsumeByte(insn); 545 insn->necessaryPrefixLocation = insn->readerCursor - 1; 546 } 547 548 if (insn->vexXopType == TYPE_XOP) { 549 insn->vexXopPrefix[0] = byte; 550 consumeByte(insn, &insn->vexXopPrefix[1]); 551 consumeByte(insn, &insn->vexXopPrefix[2]); 552 553 /* We simulate the REX prefix for simplicity's sake */ 554 555 if (insn->mode == MODE_64BIT) { 556 insn->rexPrefix = 0x40 557 | (wFromXOP3of3(insn->vexXopPrefix[2]) << 3) 558 | (rFromXOP2of3(insn->vexXopPrefix[1]) << 2) 559 | (xFromXOP2of3(insn->vexXopPrefix[1]) << 1) 560 | (bFromXOP2of3(insn->vexXopPrefix[1]) << 0); 561 } 562 563 switch (ppFromXOP3of3(insn->vexXopPrefix[2])) 564 { 565 default: 566 break; 567 case VEX_PREFIX_66: 568 hasOpSize = TRUE; 569 break; 570 } 571 572 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 573 insn->vexXopPrefix[0], insn->vexXopPrefix[1], 574 insn->vexXopPrefix[2]); 575 } 576 } 577 else { 578 if (insn->mode == MODE_64BIT) { 579 if ((byte & 0xf0) == 0x40) { 580 uint8_t opcodeByte; 581 582 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 583 dbgprintf(insn, "Redundant REX prefix"); 584 return -1; 585 } 586 587 insn->rexPrefix = byte; 588 insn->necessaryPrefixLocation = insn->readerCursor - 2; 589 590 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 591 } else { 592 unconsumeByte(insn); 593 insn->necessaryPrefixLocation = insn->readerCursor - 1; 594 } 595 } else { 596 unconsumeByte(insn); 597 insn->necessaryPrefixLocation = insn->readerCursor - 1; 598 } 599 } 600 601 if (insn->mode == MODE_16BIT) { 602 insn->registerSize = (hasOpSize ? 4 : 2); 603 insn->addressSize = (hasAdSize ? 4 : 2); 604 insn->displacementSize = (hasAdSize ? 4 : 2); 605 insn->immediateSize = (hasOpSize ? 4 : 2); 606 } else if (insn->mode == MODE_32BIT) { 607 insn->registerSize = (hasOpSize ? 2 : 4); 608 insn->addressSize = (hasAdSize ? 2 : 4); 609 insn->displacementSize = (hasAdSize ? 2 : 4); 610 insn->immediateSize = (hasOpSize ? 2 : 4); 611 } else if (insn->mode == MODE_64BIT) { 612 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 613 insn->registerSize = 8; 614 insn->addressSize = (hasAdSize ? 4 : 8); 615 insn->displacementSize = 4; 616 insn->immediateSize = 4; 617 } else if (insn->rexPrefix) { 618 insn->registerSize = (hasOpSize ? 2 : 4); 619 insn->addressSize = (hasAdSize ? 4 : 8); 620 insn->displacementSize = (hasOpSize ? 2 : 4); 621 insn->immediateSize = (hasOpSize ? 2 : 4); 622 } else { 623 insn->registerSize = (hasOpSize ? 2 : 4); 624 insn->addressSize = (hasAdSize ? 4 : 8); 625 insn->displacementSize = (hasOpSize ? 2 : 4); 626 insn->immediateSize = (hasOpSize ? 2 : 4); 627 } 628 } 629 630 return 0; 631} 632 633/* 634 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 635 * extended or escape opcodes). 636 * 637 * @param insn - The instruction whose opcode is to be read. 638 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 639 */ 640static int readOpcode(struct InternalInstruction* insn) { 641 /* Determine the length of the primary opcode */ 642 643 uint8_t current; 644 645 dbgprintf(insn, "readOpcode()"); 646 647 insn->opcodeType = ONEBYTE; 648 649 if (insn->vexXopType == TYPE_VEX_3B) 650 { 651 switch (mmmmmFromVEX2of3(insn->vexXopPrefix[1])) 652 { 653 default: 654 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 655 mmmmmFromVEX2of3(insn->vexXopPrefix[1])); 656 return -1; 657 case VEX_LOB_0F: 658 insn->opcodeType = TWOBYTE; 659 return consumeByte(insn, &insn->opcode); 660 case VEX_LOB_0F38: 661 insn->opcodeType = THREEBYTE_38; 662 return consumeByte(insn, &insn->opcode); 663 case VEX_LOB_0F3A: 664 insn->opcodeType = THREEBYTE_3A; 665 return consumeByte(insn, &insn->opcode); 666 } 667 } 668 else if (insn->vexXopType == TYPE_VEX_2B) 669 { 670 insn->opcodeType = TWOBYTE; 671 return consumeByte(insn, &insn->opcode); 672 } 673 else if (insn->vexXopType == TYPE_XOP) 674 { 675 switch (mmmmmFromXOP2of3(insn->vexXopPrefix[1])) 676 { 677 default: 678 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 679 mmmmmFromVEX2of3(insn->vexXopPrefix[1])); 680 return -1; 681 case XOP_MAP_SELECT_8: 682 insn->opcodeType = XOP8_MAP; 683 return consumeByte(insn, &insn->opcode); 684 case XOP_MAP_SELECT_9: 685 insn->opcodeType = XOP9_MAP; 686 return consumeByte(insn, &insn->opcode); 687 case XOP_MAP_SELECT_A: 688 insn->opcodeType = XOPA_MAP; 689 return consumeByte(insn, &insn->opcode); 690 } 691 } 692 693 if (consumeByte(insn, ¤t)) 694 return -1; 695 696 if (current == 0x0f) { 697 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 698 699 if (consumeByte(insn, ¤t)) 700 return -1; 701 702 if (current == 0x38) { 703 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 704 705 if (consumeByte(insn, ¤t)) 706 return -1; 707 708 insn->opcodeType = THREEBYTE_38; 709 } else if (current == 0x3a) { 710 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 711 712 if (consumeByte(insn, ¤t)) 713 return -1; 714 715 insn->opcodeType = THREEBYTE_3A; 716 } else if (current == 0xa6) { 717 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 718 719 if (consumeByte(insn, ¤t)) 720 return -1; 721 722 insn->opcodeType = THREEBYTE_A6; 723 } else if (current == 0xa7) { 724 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 725 726 if (consumeByte(insn, ¤t)) 727 return -1; 728 729 insn->opcodeType = THREEBYTE_A7; 730 } else { 731 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 732 733 insn->opcodeType = TWOBYTE; 734 } 735 } 736 737 /* 738 * At this point we have consumed the full opcode. 739 * Anything we consume from here on must be unconsumed. 740 */ 741 742 insn->opcode = current; 743 744 return 0; 745} 746 747static int readModRM(struct InternalInstruction* insn); 748 749/* 750 * getIDWithAttrMask - Determines the ID of an instruction, consuming 751 * the ModR/M byte as appropriate for extended and escape opcodes, 752 * and using a supplied attribute mask. 753 * 754 * @param instructionID - A pointer whose target is filled in with the ID of the 755 * instruction. 756 * @param insn - The instruction whose ID is to be determined. 757 * @param attrMask - The attribute mask to search. 758 * @return - 0 if the ModR/M could be read when needed or was not 759 * needed; nonzero otherwise. 760 */ 761static int getIDWithAttrMask(uint16_t* instructionID, 762 struct InternalInstruction* insn, 763 uint8_t attrMask) { 764 BOOL hasModRMExtension; 765 766 uint8_t instructionClass; 767 768 instructionClass = contextForAttrs(attrMask); 769 770 hasModRMExtension = modRMRequired(insn->opcodeType, 771 instructionClass, 772 insn->opcode); 773 774 if (hasModRMExtension) { 775 if (readModRM(insn)) 776 return -1; 777 778 *instructionID = decode(insn->opcodeType, 779 instructionClass, 780 insn->opcode, 781 insn->modRM); 782 } else { 783 *instructionID = decode(insn->opcodeType, 784 instructionClass, 785 insn->opcode, 786 0); 787 } 788 789 return 0; 790} 791 792/* 793 * is16BitEquivalent - Determines whether two instruction names refer to 794 * equivalent instructions but one is 16-bit whereas the other is not. 795 * 796 * @param orig - The instruction that is not 16-bit 797 * @param equiv - The instruction that is 16-bit 798 */ 799static BOOL is16BitEquivalent(const char* orig, const char* equiv) { 800 off_t i; 801 802 for (i = 0;; i++) { 803 if (orig[i] == '\0' && equiv[i] == '\0') 804 return TRUE; 805 if (orig[i] == '\0' || equiv[i] == '\0') 806 return FALSE; 807 if (orig[i] != equiv[i]) { 808 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 809 continue; 810 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 811 continue; 812 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 813 continue; 814 return FALSE; 815 } 816 } 817} 818 819/* 820 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 821 * appropriate for extended and escape opcodes. Determines the attributes and 822 * context for the instruction before doing so. 823 * 824 * @param insn - The instruction whose ID is to be determined. 825 * @return - 0 if the ModR/M could be read when needed or was not needed; 826 * nonzero otherwise. 827 */ 828static int getID(struct InternalInstruction* insn, const void *miiArg) { 829 uint8_t attrMask; 830 uint16_t instructionID; 831 832 dbgprintf(insn, "getID()"); 833 834 attrMask = ATTR_NONE; 835 836 if (insn->mode == MODE_64BIT) 837 attrMask |= ATTR_64BIT; 838 839 if (insn->vexXopType != TYPE_NO_VEX_XOP) { 840 attrMask |= ATTR_VEX; 841 842 if (insn->vexXopType == TYPE_VEX_3B) { 843 switch (ppFromVEX3of3(insn->vexXopPrefix[2])) { 844 case VEX_PREFIX_66: 845 attrMask |= ATTR_OPSIZE; 846 break; 847 case VEX_PREFIX_F3: 848 attrMask |= ATTR_XS; 849 break; 850 case VEX_PREFIX_F2: 851 attrMask |= ATTR_XD; 852 break; 853 } 854 855 if (lFromVEX3of3(insn->vexXopPrefix[2])) 856 attrMask |= ATTR_VEXL; 857 } 858 else if (insn->vexXopType == TYPE_VEX_2B) { 859 switch (ppFromVEX2of2(insn->vexXopPrefix[1])) { 860 case VEX_PREFIX_66: 861 attrMask |= ATTR_OPSIZE; 862 break; 863 case VEX_PREFIX_F3: 864 attrMask |= ATTR_XS; 865 break; 866 case VEX_PREFIX_F2: 867 attrMask |= ATTR_XD; 868 break; 869 } 870 871 if (lFromVEX2of2(insn->vexXopPrefix[1])) 872 attrMask |= ATTR_VEXL; 873 } 874 else if (insn->vexXopType == TYPE_XOP) { 875 switch (ppFromXOP3of3(insn->vexXopPrefix[2])) { 876 case VEX_PREFIX_66: 877 attrMask |= ATTR_OPSIZE; 878 break; 879 case VEX_PREFIX_F3: 880 attrMask |= ATTR_XS; 881 break; 882 case VEX_PREFIX_F2: 883 attrMask |= ATTR_XD; 884 break; 885 } 886 887 if (lFromXOP3of3(insn->vexXopPrefix[2])) 888 attrMask |= ATTR_VEXL; 889 } 890 else { 891 return -1; 892 } 893 } 894 else { 895 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 896 attrMask |= ATTR_OPSIZE; 897 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 898 attrMask |= ATTR_ADSIZE; 899 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 900 attrMask |= ATTR_XS; 901 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 902 attrMask |= ATTR_XD; 903 } 904 905 if (insn->rexPrefix & 0x08) 906 attrMask |= ATTR_REXW; 907 908 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 909 return -1; 910 911 /* The following clauses compensate for limitations of the tables. */ 912 913 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 914 /* 915 * The instruction tables make no distinction between instructions that 916 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 917 * particular spot (i.e., many MMX operations). In general we're 918 * conservative, but in the specific case where OpSize is present but not 919 * in the right place we check if there's a 16-bit operation. 920 */ 921 922 const struct InstructionSpecifier *spec; 923 uint16_t instructionIDWithOpsize; 924 const char *specName, *specWithOpSizeName; 925 926 spec = specifierForUID(instructionID); 927 928 if (getIDWithAttrMask(&instructionIDWithOpsize, 929 insn, 930 attrMask | ATTR_OPSIZE)) { 931 /* 932 * ModRM required with OpSize but not present; give up and return version 933 * without OpSize set 934 */ 935 936 insn->instructionID = instructionID; 937 insn->spec = spec; 938 return 0; 939 } 940 941 specName = x86DisassemblerGetInstrName(instructionID, miiArg); 942 specWithOpSizeName = 943 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); 944 945 if (is16BitEquivalent(specName, specWithOpSizeName)) { 946 insn->instructionID = instructionIDWithOpsize; 947 insn->spec = specifierForUID(instructionIDWithOpsize); 948 } else { 949 insn->instructionID = instructionID; 950 insn->spec = spec; 951 } 952 return 0; 953 } 954 955 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 956 insn->rexPrefix & 0x01) { 957 /* 958 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 959 * it should decode as XCHG %r8, %eax. 960 */ 961 962 const struct InstructionSpecifier *spec; 963 uint16_t instructionIDWithNewOpcode; 964 const struct InstructionSpecifier *specWithNewOpcode; 965 966 spec = specifierForUID(instructionID); 967 968 /* Borrow opcode from one of the other XCHGar opcodes */ 969 insn->opcode = 0x91; 970 971 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 972 insn, 973 attrMask)) { 974 insn->opcode = 0x90; 975 976 insn->instructionID = instructionID; 977 insn->spec = spec; 978 return 0; 979 } 980 981 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 982 983 /* Change back */ 984 insn->opcode = 0x90; 985 986 insn->instructionID = instructionIDWithNewOpcode; 987 insn->spec = specWithNewOpcode; 988 989 return 0; 990 } 991 992 insn->instructionID = instructionID; 993 insn->spec = specifierForUID(insn->instructionID); 994 995 return 0; 996} 997 998/* 999 * readSIB - Consumes the SIB byte to determine addressing information for an 1000 * instruction. 1001 * 1002 * @param insn - The instruction whose SIB byte is to be read. 1003 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 1004 */ 1005static int readSIB(struct InternalInstruction* insn) { 1006 SIBIndex sibIndexBase = 0; 1007 SIBBase sibBaseBase = 0; 1008 uint8_t index, base; 1009 1010 dbgprintf(insn, "readSIB()"); 1011 1012 if (insn->consumedSIB) 1013 return 0; 1014 1015 insn->consumedSIB = TRUE; 1016 1017 switch (insn->addressSize) { 1018 case 2: 1019 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 1020 return -1; 1021 break; 1022 case 4: 1023 sibIndexBase = SIB_INDEX_EAX; 1024 sibBaseBase = SIB_BASE_EAX; 1025 break; 1026 case 8: 1027 sibIndexBase = SIB_INDEX_RAX; 1028 sibBaseBase = SIB_BASE_RAX; 1029 break; 1030 } 1031 1032 if (consumeByte(insn, &insn->sib)) 1033 return -1; 1034 1035 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 1036 1037 switch (index) { 1038 case 0x4: 1039 insn->sibIndex = SIB_INDEX_NONE; 1040 break; 1041 default: 1042 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 1043 if (insn->sibIndex == SIB_INDEX_sib || 1044 insn->sibIndex == SIB_INDEX_sib64) 1045 insn->sibIndex = SIB_INDEX_NONE; 1046 break; 1047 } 1048 1049 switch (scaleFromSIB(insn->sib)) { 1050 case 0: 1051 insn->sibScale = 1; 1052 break; 1053 case 1: 1054 insn->sibScale = 2; 1055 break; 1056 case 2: 1057 insn->sibScale = 4; 1058 break; 1059 case 3: 1060 insn->sibScale = 8; 1061 break; 1062 } 1063 1064 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 1065 1066 switch (base) { 1067 case 0x5:
| 1/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is distributed under the University of Illinois Open Source 6 * License. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===* 9 * 10 * This file is part of the X86 Disassembler. 11 * It contains the implementation of the instruction decoder. 12 * Documentation for the disassembler can be found in X86Disassembler.h. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16#include <stdarg.h> /* for va_*() */ 17#include <stdio.h> /* for vsnprintf() */ 18#include <stdlib.h> /* for exit() */ 19#include <string.h> /* for memset() */ 20 21#include "X86DisassemblerDecoder.h" 22 23#include "X86GenDisassemblerTables.inc" 24 25#define TRUE 1 26#define FALSE 0 27 28#ifndef NDEBUG 29#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) 30#else 31#define debug(s) do { } while (0) 32#endif 33 34 35/* 36 * contextForAttrs - Client for the instruction context table. Takes a set of 37 * attributes and returns the appropriate decode context. 38 * 39 * @param attrMask - Attributes, from the enumeration attributeBits. 40 * @return - The InstructionContext to use when looking up an 41 * an instruction with these attributes. 42 */ 43static InstructionContext contextForAttrs(uint8_t attrMask) { 44 return CONTEXTS_SYM[attrMask]; 45} 46 47/* 48 * modRMRequired - Reads the appropriate instruction table to determine whether 49 * the ModR/M byte is required to decode a particular instruction. 50 * 51 * @param type - The opcode type (i.e., how many bytes it has). 52 * @param insnContext - The context for the instruction, as returned by 53 * contextForAttrs. 54 * @param opcode - The last byte of the instruction's opcode, not counting 55 * ModR/M extensions and escapes. 56 * @return - TRUE if the ModR/M byte is required, FALSE otherwise. 57 */ 58static int modRMRequired(OpcodeType type, 59 InstructionContext insnContext, 60 uint8_t opcode) { 61 const struct ContextDecision* decision = 0; 62 63 switch (type) { 64 case ONEBYTE: 65 decision = &ONEBYTE_SYM; 66 break; 67 case TWOBYTE: 68 decision = &TWOBYTE_SYM; 69 break; 70 case THREEBYTE_38: 71 decision = &THREEBYTE38_SYM; 72 break; 73 case THREEBYTE_3A: 74 decision = &THREEBYTE3A_SYM; 75 break; 76 case THREEBYTE_A6: 77 decision = &THREEBYTEA6_SYM; 78 break; 79 case THREEBYTE_A7: 80 decision = &THREEBYTEA7_SYM; 81 break; 82 case XOP8_MAP: 83 decision = &XOP8_MAP_SYM; 84 break; 85 case XOP9_MAP: 86 decision = &XOP9_MAP_SYM; 87 break; 88 case XOPA_MAP: 89 decision = &XOPA_MAP_SYM; 90 break; 91 } 92 93 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. 94 modrm_type != MODRM_ONEENTRY; 95} 96 97/* 98 * decode - Reads the appropriate instruction table to obtain the unique ID of 99 * an instruction. 100 * 101 * @param type - See modRMRequired(). 102 * @param insnContext - See modRMRequired(). 103 * @param opcode - See modRMRequired(). 104 * @param modRM - The ModR/M byte if required, or any value if not. 105 * @return - The UID of the instruction, or 0 on failure. 106 */ 107static InstrUID decode(OpcodeType type, 108 InstructionContext insnContext, 109 uint8_t opcode, 110 uint8_t modRM) { 111 const struct ModRMDecision* dec = 0; 112 113 switch (type) { 114 case ONEBYTE: 115 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 116 break; 117 case TWOBYTE: 118 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 119 break; 120 case THREEBYTE_38: 121 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 122 break; 123 case THREEBYTE_3A: 124 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 125 break; 126 case THREEBYTE_A6: 127 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 128 break; 129 case THREEBYTE_A7: 130 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 131 break; 132 case XOP8_MAP: 133 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 134 break; 135 case XOP9_MAP: 136 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 137 break; 138 case XOPA_MAP: 139 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 140 break; 141 } 142 143 switch (dec->modrm_type) { 144 default: 145 debug("Corrupt table! Unknown modrm_type"); 146 return 0; 147 case MODRM_ONEENTRY: 148 return modRMTable[dec->instructionIDs]; 149 case MODRM_SPLITRM: 150 if (modFromModRM(modRM) == 0x3) 151 return modRMTable[dec->instructionIDs+1]; 152 return modRMTable[dec->instructionIDs]; 153 case MODRM_SPLITREG: 154 if (modFromModRM(modRM) == 0x3) 155 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; 156 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 157 case MODRM_SPLITMISC: 158 if (modFromModRM(modRM) == 0x3) 159 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; 160 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; 161 case MODRM_FULL: 162 return modRMTable[dec->instructionIDs+modRM]; 163 } 164} 165 166/* 167 * specifierForUID - Given a UID, returns the name and operand specification for 168 * that instruction. 169 * 170 * @param uid - The unique ID for the instruction. This should be returned by 171 * decode(); specifierForUID will not check bounds. 172 * @return - A pointer to the specification for that instruction. 173 */ 174static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { 175 return &INSTRUCTIONS_SYM[uid]; 176} 177 178/* 179 * consumeByte - Uses the reader function provided by the user to consume one 180 * byte from the instruction's memory and advance the cursor. 181 * 182 * @param insn - The instruction with the reader function to use. The cursor 183 * for this instruction is advanced. 184 * @param byte - A pointer to a pre-allocated memory buffer to be populated 185 * with the data read. 186 * @return - 0 if the read was successful; nonzero otherwise. 187 */ 188static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { 189 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); 190 191 if (!ret) 192 ++(insn->readerCursor); 193 194 return ret; 195} 196 197/* 198 * lookAtByte - Like consumeByte, but does not advance the cursor. 199 * 200 * @param insn - See consumeByte(). 201 * @param byte - See consumeByte(). 202 * @return - See consumeByte(). 203 */ 204static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { 205 return insn->reader(insn->readerArg, byte, insn->readerCursor); 206} 207 208static void unconsumeByte(struct InternalInstruction* insn) { 209 insn->readerCursor--; 210} 211 212#define CONSUME_FUNC(name, type) \ 213 static int name(struct InternalInstruction* insn, type* ptr) { \ 214 type combined = 0; \ 215 unsigned offset; \ 216 for (offset = 0; offset < sizeof(type); ++offset) { \ 217 uint8_t byte; \ 218 int ret = insn->reader(insn->readerArg, \ 219 &byte, \ 220 insn->readerCursor + offset); \ 221 if (ret) \ 222 return ret; \ 223 combined = combined | ((uint64_t)byte << (offset * 8)); \ 224 } \ 225 *ptr = combined; \ 226 insn->readerCursor += sizeof(type); \ 227 return 0; \ 228 } 229 230/* 231 * consume* - Use the reader function provided by the user to consume data 232 * values of various sizes from the instruction's memory and advance the 233 * cursor appropriately. These readers perform endian conversion. 234 * 235 * @param insn - See consumeByte(). 236 * @param ptr - A pointer to a pre-allocated memory of appropriate size to 237 * be populated with the data read. 238 * @return - See consumeByte(). 239 */ 240CONSUME_FUNC(consumeInt8, int8_t) 241CONSUME_FUNC(consumeInt16, int16_t) 242CONSUME_FUNC(consumeInt32, int32_t) 243CONSUME_FUNC(consumeUInt16, uint16_t) 244CONSUME_FUNC(consumeUInt32, uint32_t) 245CONSUME_FUNC(consumeUInt64, uint64_t) 246 247/* 248 * dbgprintf - Uses the logging function provided by the user to log a single 249 * message, typically without a carriage-return. 250 * 251 * @param insn - The instruction containing the logging function. 252 * @param format - See printf(). 253 * @param ... - See printf(). 254 */ 255static void dbgprintf(struct InternalInstruction* insn, 256 const char* format, 257 ...) { 258 char buffer[256]; 259 va_list ap; 260 261 if (!insn->dlog) 262 return; 263 264 va_start(ap, format); 265 (void)vsnprintf(buffer, sizeof(buffer), format, ap); 266 va_end(ap); 267 268 insn->dlog(insn->dlogArg, buffer); 269 270 return; 271} 272 273/* 274 * setPrefixPresent - Marks that a particular prefix is present at a particular 275 * location. 276 * 277 * @param insn - The instruction to be marked as having the prefix. 278 * @param prefix - The prefix that is present. 279 * @param location - The location where the prefix is located (in the address 280 * space of the instruction's reader). 281 */ 282static void setPrefixPresent(struct InternalInstruction* insn, 283 uint8_t prefix, 284 uint64_t location) 285{ 286 insn->prefixPresent[prefix] = 1; 287 insn->prefixLocations[prefix] = location; 288} 289 290/* 291 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is 292 * present at a given location. 293 * 294 * @param insn - The instruction to be queried. 295 * @param prefix - The prefix. 296 * @param location - The location to query. 297 * @return - Whether the prefix is at that location. 298 */ 299static BOOL isPrefixAtLocation(struct InternalInstruction* insn, 300 uint8_t prefix, 301 uint64_t location) 302{ 303 if (insn->prefixPresent[prefix] == 1 && 304 insn->prefixLocations[prefix] == location) 305 return TRUE; 306 else 307 return FALSE; 308} 309 310/* 311 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the 312 * instruction as having them. Also sets the instruction's default operand, 313 * address, and other relevant data sizes to report operands correctly. 314 * 315 * @param insn - The instruction whose prefixes are to be read. 316 * @return - 0 if the instruction could be read until the end of the prefix 317 * bytes, and no prefixes conflicted; nonzero otherwise. 318 */ 319static int readPrefixes(struct InternalInstruction* insn) { 320 BOOL isPrefix = TRUE; 321 BOOL prefixGroups[4] = { FALSE }; 322 uint64_t prefixLocation; 323 uint8_t byte = 0; 324 uint8_t nextByte; 325 326 BOOL hasAdSize = FALSE; 327 BOOL hasOpSize = FALSE; 328 329 dbgprintf(insn, "readPrefixes()"); 330 331 while (isPrefix) { 332 prefixLocation = insn->readerCursor; 333 334 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ 335 if (consumeByte(insn, &byte)) 336 break; 337 338 /* 339 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 340 * break and let it be disassembled as a normal "instruction". 341 */ 342 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) 343 break; 344 345 if (insn->readerCursor - 1 == insn->startLocation 346 && (byte == 0xf2 || byte == 0xf3) 347 && !lookAtByte(insn, &nextByte)) 348 { 349 /* 350 * If the byte is 0xf2 or 0xf3, and any of the following conditions are 351 * met: 352 * - it is followed by a LOCK (0xf0) prefix 353 * - it is followed by an xchg instruction 354 * then it should be disassembled as a xacquire/xrelease not repne/rep. 355 */ 356 if ((byte == 0xf2 || byte == 0xf3) && 357 ((nextByte == 0xf0) | 358 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) 359 insn->xAcquireRelease = TRUE; 360 /* 361 * Also if the byte is 0xf3, and the following condition is met: 362 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 363 * "mov mem, imm" (opcode 0xc6/0xc7) instructions. 364 * then it should be disassembled as an xrelease not rep. 365 */ 366 if (byte == 0xf3 && 367 (nextByte == 0x88 || nextByte == 0x89 || 368 nextByte == 0xc6 || nextByte == 0xc7)) 369 insn->xAcquireRelease = TRUE; 370 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { 371 if (consumeByte(insn, &nextByte)) 372 return -1; 373 if (lookAtByte(insn, &nextByte)) 374 return -1; 375 unconsumeByte(insn); 376 } 377 if (nextByte != 0x0f && nextByte != 0x90) 378 break; 379 } 380 381 switch (byte) { 382 case 0xf0: /* LOCK */ 383 case 0xf2: /* REPNE/REPNZ */ 384 case 0xf3: /* REP or REPE/REPZ */ 385 if (prefixGroups[0]) 386 dbgprintf(insn, "Redundant Group 1 prefix"); 387 prefixGroups[0] = TRUE; 388 setPrefixPresent(insn, byte, prefixLocation); 389 break; 390 case 0x2e: /* CS segment override -OR- Branch not taken */ 391 case 0x36: /* SS segment override -OR- Branch taken */ 392 case 0x3e: /* DS segment override */ 393 case 0x26: /* ES segment override */ 394 case 0x64: /* FS segment override */ 395 case 0x65: /* GS segment override */ 396 switch (byte) { 397 case 0x2e: 398 insn->segmentOverride = SEG_OVERRIDE_CS; 399 break; 400 case 0x36: 401 insn->segmentOverride = SEG_OVERRIDE_SS; 402 break; 403 case 0x3e: 404 insn->segmentOverride = SEG_OVERRIDE_DS; 405 break; 406 case 0x26: 407 insn->segmentOverride = SEG_OVERRIDE_ES; 408 break; 409 case 0x64: 410 insn->segmentOverride = SEG_OVERRIDE_FS; 411 break; 412 case 0x65: 413 insn->segmentOverride = SEG_OVERRIDE_GS; 414 break; 415 default: 416 debug("Unhandled override"); 417 return -1; 418 } 419 if (prefixGroups[1]) 420 dbgprintf(insn, "Redundant Group 2 prefix"); 421 prefixGroups[1] = TRUE; 422 setPrefixPresent(insn, byte, prefixLocation); 423 break; 424 case 0x66: /* Operand-size override */ 425 if (prefixGroups[2]) 426 dbgprintf(insn, "Redundant Group 3 prefix"); 427 prefixGroups[2] = TRUE; 428 hasOpSize = TRUE; 429 setPrefixPresent(insn, byte, prefixLocation); 430 break; 431 case 0x67: /* Address-size override */ 432 if (prefixGroups[3]) 433 dbgprintf(insn, "Redundant Group 4 prefix"); 434 prefixGroups[3] = TRUE; 435 hasAdSize = TRUE; 436 setPrefixPresent(insn, byte, prefixLocation); 437 break; 438 default: /* Not a prefix byte */ 439 isPrefix = FALSE; 440 break; 441 } 442 443 if (isPrefix) 444 dbgprintf(insn, "Found prefix 0x%hhx", byte); 445 } 446 447 insn->vexXopType = TYPE_NO_VEX_XOP; 448 449 if (byte == 0xc4) { 450 uint8_t byte1; 451 452 if (lookAtByte(insn, &byte1)) { 453 dbgprintf(insn, "Couldn't read second byte of VEX"); 454 return -1; 455 } 456 457 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 458 insn->vexXopType = TYPE_VEX_3B; 459 insn->necessaryPrefixLocation = insn->readerCursor - 1; 460 } 461 else { 462 unconsumeByte(insn); 463 insn->necessaryPrefixLocation = insn->readerCursor - 1; 464 } 465 466 if (insn->vexXopType == TYPE_VEX_3B) { 467 insn->vexXopPrefix[0] = byte; 468 consumeByte(insn, &insn->vexXopPrefix[1]); 469 consumeByte(insn, &insn->vexXopPrefix[2]); 470 471 /* We simulate the REX prefix for simplicity's sake */ 472 473 if (insn->mode == MODE_64BIT) { 474 insn->rexPrefix = 0x40 475 | (wFromVEX3of3(insn->vexXopPrefix[2]) << 3) 476 | (rFromVEX2of3(insn->vexXopPrefix[1]) << 2) 477 | (xFromVEX2of3(insn->vexXopPrefix[1]) << 1) 478 | (bFromVEX2of3(insn->vexXopPrefix[1]) << 0); 479 } 480 481 switch (ppFromVEX3of3(insn->vexXopPrefix[2])) 482 { 483 default: 484 break; 485 case VEX_PREFIX_66: 486 hasOpSize = TRUE; 487 break; 488 } 489 490 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 491 insn->vexXopPrefix[0], insn->vexXopPrefix[1], 492 insn->vexXopPrefix[2]); 493 } 494 } 495 else if (byte == 0xc5) { 496 uint8_t byte1; 497 498 if (lookAtByte(insn, &byte1)) { 499 dbgprintf(insn, "Couldn't read second byte of VEX"); 500 return -1; 501 } 502 503 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { 504 insn->vexXopType = TYPE_VEX_2B; 505 } 506 else { 507 unconsumeByte(insn); 508 } 509 510 if (insn->vexXopType == TYPE_VEX_2B) { 511 insn->vexXopPrefix[0] = byte; 512 consumeByte(insn, &insn->vexXopPrefix[1]); 513 514 if (insn->mode == MODE_64BIT) { 515 insn->rexPrefix = 0x40 516 | (rFromVEX2of2(insn->vexXopPrefix[1]) << 2); 517 } 518 519 switch (ppFromVEX2of2(insn->vexXopPrefix[1])) 520 { 521 default: 522 break; 523 case VEX_PREFIX_66: 524 hasOpSize = TRUE; 525 break; 526 } 527 528 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexXopPrefix[0], insn->vexXopPrefix[1]); 529 } 530 } 531 else if (byte == 0x8f) { 532 uint8_t byte1; 533 534 if (lookAtByte(insn, &byte1)) { 535 dbgprintf(insn, "Couldn't read second byte of XOP"); 536 return -1; 537 } 538 539 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ 540 insn->vexXopType = TYPE_XOP; 541 insn->necessaryPrefixLocation = insn->readerCursor - 1; 542 } 543 else { 544 unconsumeByte(insn); 545 insn->necessaryPrefixLocation = insn->readerCursor - 1; 546 } 547 548 if (insn->vexXopType == TYPE_XOP) { 549 insn->vexXopPrefix[0] = byte; 550 consumeByte(insn, &insn->vexXopPrefix[1]); 551 consumeByte(insn, &insn->vexXopPrefix[2]); 552 553 /* We simulate the REX prefix for simplicity's sake */ 554 555 if (insn->mode == MODE_64BIT) { 556 insn->rexPrefix = 0x40 557 | (wFromXOP3of3(insn->vexXopPrefix[2]) << 3) 558 | (rFromXOP2of3(insn->vexXopPrefix[1]) << 2) 559 | (xFromXOP2of3(insn->vexXopPrefix[1]) << 1) 560 | (bFromXOP2of3(insn->vexXopPrefix[1]) << 0); 561 } 562 563 switch (ppFromXOP3of3(insn->vexXopPrefix[2])) 564 { 565 default: 566 break; 567 case VEX_PREFIX_66: 568 hasOpSize = TRUE; 569 break; 570 } 571 572 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 573 insn->vexXopPrefix[0], insn->vexXopPrefix[1], 574 insn->vexXopPrefix[2]); 575 } 576 } 577 else { 578 if (insn->mode == MODE_64BIT) { 579 if ((byte & 0xf0) == 0x40) { 580 uint8_t opcodeByte; 581 582 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { 583 dbgprintf(insn, "Redundant REX prefix"); 584 return -1; 585 } 586 587 insn->rexPrefix = byte; 588 insn->necessaryPrefixLocation = insn->readerCursor - 2; 589 590 dbgprintf(insn, "Found REX prefix 0x%hhx", byte); 591 } else { 592 unconsumeByte(insn); 593 insn->necessaryPrefixLocation = insn->readerCursor - 1; 594 } 595 } else { 596 unconsumeByte(insn); 597 insn->necessaryPrefixLocation = insn->readerCursor - 1; 598 } 599 } 600 601 if (insn->mode == MODE_16BIT) { 602 insn->registerSize = (hasOpSize ? 4 : 2); 603 insn->addressSize = (hasAdSize ? 4 : 2); 604 insn->displacementSize = (hasAdSize ? 4 : 2); 605 insn->immediateSize = (hasOpSize ? 4 : 2); 606 } else if (insn->mode == MODE_32BIT) { 607 insn->registerSize = (hasOpSize ? 2 : 4); 608 insn->addressSize = (hasAdSize ? 2 : 4); 609 insn->displacementSize = (hasAdSize ? 2 : 4); 610 insn->immediateSize = (hasOpSize ? 2 : 4); 611 } else if (insn->mode == MODE_64BIT) { 612 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 613 insn->registerSize = 8; 614 insn->addressSize = (hasAdSize ? 4 : 8); 615 insn->displacementSize = 4; 616 insn->immediateSize = 4; 617 } else if (insn->rexPrefix) { 618 insn->registerSize = (hasOpSize ? 2 : 4); 619 insn->addressSize = (hasAdSize ? 4 : 8); 620 insn->displacementSize = (hasOpSize ? 2 : 4); 621 insn->immediateSize = (hasOpSize ? 2 : 4); 622 } else { 623 insn->registerSize = (hasOpSize ? 2 : 4); 624 insn->addressSize = (hasAdSize ? 4 : 8); 625 insn->displacementSize = (hasOpSize ? 2 : 4); 626 insn->immediateSize = (hasOpSize ? 2 : 4); 627 } 628 } 629 630 return 0; 631} 632 633/* 634 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of 635 * extended or escape opcodes). 636 * 637 * @param insn - The instruction whose opcode is to be read. 638 * @return - 0 if the opcode could be read successfully; nonzero otherwise. 639 */ 640static int readOpcode(struct InternalInstruction* insn) { 641 /* Determine the length of the primary opcode */ 642 643 uint8_t current; 644 645 dbgprintf(insn, "readOpcode()"); 646 647 insn->opcodeType = ONEBYTE; 648 649 if (insn->vexXopType == TYPE_VEX_3B) 650 { 651 switch (mmmmmFromVEX2of3(insn->vexXopPrefix[1])) 652 { 653 default: 654 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 655 mmmmmFromVEX2of3(insn->vexXopPrefix[1])); 656 return -1; 657 case VEX_LOB_0F: 658 insn->opcodeType = TWOBYTE; 659 return consumeByte(insn, &insn->opcode); 660 case VEX_LOB_0F38: 661 insn->opcodeType = THREEBYTE_38; 662 return consumeByte(insn, &insn->opcode); 663 case VEX_LOB_0F3A: 664 insn->opcodeType = THREEBYTE_3A; 665 return consumeByte(insn, &insn->opcode); 666 } 667 } 668 else if (insn->vexXopType == TYPE_VEX_2B) 669 { 670 insn->opcodeType = TWOBYTE; 671 return consumeByte(insn, &insn->opcode); 672 } 673 else if (insn->vexXopType == TYPE_XOP) 674 { 675 switch (mmmmmFromXOP2of3(insn->vexXopPrefix[1])) 676 { 677 default: 678 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", 679 mmmmmFromVEX2of3(insn->vexXopPrefix[1])); 680 return -1; 681 case XOP_MAP_SELECT_8: 682 insn->opcodeType = XOP8_MAP; 683 return consumeByte(insn, &insn->opcode); 684 case XOP_MAP_SELECT_9: 685 insn->opcodeType = XOP9_MAP; 686 return consumeByte(insn, &insn->opcode); 687 case XOP_MAP_SELECT_A: 688 insn->opcodeType = XOPA_MAP; 689 return consumeByte(insn, &insn->opcode); 690 } 691 } 692 693 if (consumeByte(insn, ¤t)) 694 return -1; 695 696 if (current == 0x0f) { 697 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); 698 699 if (consumeByte(insn, ¤t)) 700 return -1; 701 702 if (current == 0x38) { 703 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 704 705 if (consumeByte(insn, ¤t)) 706 return -1; 707 708 insn->opcodeType = THREEBYTE_38; 709 } else if (current == 0x3a) { 710 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 711 712 if (consumeByte(insn, ¤t)) 713 return -1; 714 715 insn->opcodeType = THREEBYTE_3A; 716 } else if (current == 0xa6) { 717 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 718 719 if (consumeByte(insn, ¤t)) 720 return -1; 721 722 insn->opcodeType = THREEBYTE_A6; 723 } else if (current == 0xa7) { 724 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); 725 726 if (consumeByte(insn, ¤t)) 727 return -1; 728 729 insn->opcodeType = THREEBYTE_A7; 730 } else { 731 dbgprintf(insn, "Didn't find a three-byte escape prefix"); 732 733 insn->opcodeType = TWOBYTE; 734 } 735 } 736 737 /* 738 * At this point we have consumed the full opcode. 739 * Anything we consume from here on must be unconsumed. 740 */ 741 742 insn->opcode = current; 743 744 return 0; 745} 746 747static int readModRM(struct InternalInstruction* insn); 748 749/* 750 * getIDWithAttrMask - Determines the ID of an instruction, consuming 751 * the ModR/M byte as appropriate for extended and escape opcodes, 752 * and using a supplied attribute mask. 753 * 754 * @param instructionID - A pointer whose target is filled in with the ID of the 755 * instruction. 756 * @param insn - The instruction whose ID is to be determined. 757 * @param attrMask - The attribute mask to search. 758 * @return - 0 if the ModR/M could be read when needed or was not 759 * needed; nonzero otherwise. 760 */ 761static int getIDWithAttrMask(uint16_t* instructionID, 762 struct InternalInstruction* insn, 763 uint8_t attrMask) { 764 BOOL hasModRMExtension; 765 766 uint8_t instructionClass; 767 768 instructionClass = contextForAttrs(attrMask); 769 770 hasModRMExtension = modRMRequired(insn->opcodeType, 771 instructionClass, 772 insn->opcode); 773 774 if (hasModRMExtension) { 775 if (readModRM(insn)) 776 return -1; 777 778 *instructionID = decode(insn->opcodeType, 779 instructionClass, 780 insn->opcode, 781 insn->modRM); 782 } else { 783 *instructionID = decode(insn->opcodeType, 784 instructionClass, 785 insn->opcode, 786 0); 787 } 788 789 return 0; 790} 791 792/* 793 * is16BitEquivalent - Determines whether two instruction names refer to 794 * equivalent instructions but one is 16-bit whereas the other is not. 795 * 796 * @param orig - The instruction that is not 16-bit 797 * @param equiv - The instruction that is 16-bit 798 */ 799static BOOL is16BitEquivalent(const char* orig, const char* equiv) { 800 off_t i; 801 802 for (i = 0;; i++) { 803 if (orig[i] == '\0' && equiv[i] == '\0') 804 return TRUE; 805 if (orig[i] == '\0' || equiv[i] == '\0') 806 return FALSE; 807 if (orig[i] != equiv[i]) { 808 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 809 continue; 810 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 811 continue; 812 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 813 continue; 814 return FALSE; 815 } 816 } 817} 818 819/* 820 * getID - Determines the ID of an instruction, consuming the ModR/M byte as 821 * appropriate for extended and escape opcodes. Determines the attributes and 822 * context for the instruction before doing so. 823 * 824 * @param insn - The instruction whose ID is to be determined. 825 * @return - 0 if the ModR/M could be read when needed or was not needed; 826 * nonzero otherwise. 827 */ 828static int getID(struct InternalInstruction* insn, const void *miiArg) { 829 uint8_t attrMask; 830 uint16_t instructionID; 831 832 dbgprintf(insn, "getID()"); 833 834 attrMask = ATTR_NONE; 835 836 if (insn->mode == MODE_64BIT) 837 attrMask |= ATTR_64BIT; 838 839 if (insn->vexXopType != TYPE_NO_VEX_XOP) { 840 attrMask |= ATTR_VEX; 841 842 if (insn->vexXopType == TYPE_VEX_3B) { 843 switch (ppFromVEX3of3(insn->vexXopPrefix[2])) { 844 case VEX_PREFIX_66: 845 attrMask |= ATTR_OPSIZE; 846 break; 847 case VEX_PREFIX_F3: 848 attrMask |= ATTR_XS; 849 break; 850 case VEX_PREFIX_F2: 851 attrMask |= ATTR_XD; 852 break; 853 } 854 855 if (lFromVEX3of3(insn->vexXopPrefix[2])) 856 attrMask |= ATTR_VEXL; 857 } 858 else if (insn->vexXopType == TYPE_VEX_2B) { 859 switch (ppFromVEX2of2(insn->vexXopPrefix[1])) { 860 case VEX_PREFIX_66: 861 attrMask |= ATTR_OPSIZE; 862 break; 863 case VEX_PREFIX_F3: 864 attrMask |= ATTR_XS; 865 break; 866 case VEX_PREFIX_F2: 867 attrMask |= ATTR_XD; 868 break; 869 } 870 871 if (lFromVEX2of2(insn->vexXopPrefix[1])) 872 attrMask |= ATTR_VEXL; 873 } 874 else if (insn->vexXopType == TYPE_XOP) { 875 switch (ppFromXOP3of3(insn->vexXopPrefix[2])) { 876 case VEX_PREFIX_66: 877 attrMask |= ATTR_OPSIZE; 878 break; 879 case VEX_PREFIX_F3: 880 attrMask |= ATTR_XS; 881 break; 882 case VEX_PREFIX_F2: 883 attrMask |= ATTR_XD; 884 break; 885 } 886 887 if (lFromXOP3of3(insn->vexXopPrefix[2])) 888 attrMask |= ATTR_VEXL; 889 } 890 else { 891 return -1; 892 } 893 } 894 else { 895 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) 896 attrMask |= ATTR_OPSIZE; 897 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) 898 attrMask |= ATTR_ADSIZE; 899 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) 900 attrMask |= ATTR_XS; 901 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) 902 attrMask |= ATTR_XD; 903 } 904 905 if (insn->rexPrefix & 0x08) 906 attrMask |= ATTR_REXW; 907 908 if (getIDWithAttrMask(&instructionID, insn, attrMask)) 909 return -1; 910 911 /* The following clauses compensate for limitations of the tables. */ 912 913 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { 914 /* 915 * The instruction tables make no distinction between instructions that 916 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 917 * particular spot (i.e., many MMX operations). In general we're 918 * conservative, but in the specific case where OpSize is present but not 919 * in the right place we check if there's a 16-bit operation. 920 */ 921 922 const struct InstructionSpecifier *spec; 923 uint16_t instructionIDWithOpsize; 924 const char *specName, *specWithOpSizeName; 925 926 spec = specifierForUID(instructionID); 927 928 if (getIDWithAttrMask(&instructionIDWithOpsize, 929 insn, 930 attrMask | ATTR_OPSIZE)) { 931 /* 932 * ModRM required with OpSize but not present; give up and return version 933 * without OpSize set 934 */ 935 936 insn->instructionID = instructionID; 937 insn->spec = spec; 938 return 0; 939 } 940 941 specName = x86DisassemblerGetInstrName(instructionID, miiArg); 942 specWithOpSizeName = 943 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); 944 945 if (is16BitEquivalent(specName, specWithOpSizeName)) { 946 insn->instructionID = instructionIDWithOpsize; 947 insn->spec = specifierForUID(instructionIDWithOpsize); 948 } else { 949 insn->instructionID = instructionID; 950 insn->spec = spec; 951 } 952 return 0; 953 } 954 955 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 956 insn->rexPrefix & 0x01) { 957 /* 958 * NOOP shouldn't decode as NOOP if REX.b is set. Instead 959 * it should decode as XCHG %r8, %eax. 960 */ 961 962 const struct InstructionSpecifier *spec; 963 uint16_t instructionIDWithNewOpcode; 964 const struct InstructionSpecifier *specWithNewOpcode; 965 966 spec = specifierForUID(instructionID); 967 968 /* Borrow opcode from one of the other XCHGar opcodes */ 969 insn->opcode = 0x91; 970 971 if (getIDWithAttrMask(&instructionIDWithNewOpcode, 972 insn, 973 attrMask)) { 974 insn->opcode = 0x90; 975 976 insn->instructionID = instructionID; 977 insn->spec = spec; 978 return 0; 979 } 980 981 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); 982 983 /* Change back */ 984 insn->opcode = 0x90; 985 986 insn->instructionID = instructionIDWithNewOpcode; 987 insn->spec = specWithNewOpcode; 988 989 return 0; 990 } 991 992 insn->instructionID = instructionID; 993 insn->spec = specifierForUID(insn->instructionID); 994 995 return 0; 996} 997 998/* 999 * readSIB - Consumes the SIB byte to determine addressing information for an 1000 * instruction. 1001 * 1002 * @param insn - The instruction whose SIB byte is to be read. 1003 * @return - 0 if the SIB byte was successfully read; nonzero otherwise. 1004 */ 1005static int readSIB(struct InternalInstruction* insn) { 1006 SIBIndex sibIndexBase = 0; 1007 SIBBase sibBaseBase = 0; 1008 uint8_t index, base; 1009 1010 dbgprintf(insn, "readSIB()"); 1011 1012 if (insn->consumedSIB) 1013 return 0; 1014 1015 insn->consumedSIB = TRUE; 1016 1017 switch (insn->addressSize) { 1018 case 2: 1019 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); 1020 return -1; 1021 break; 1022 case 4: 1023 sibIndexBase = SIB_INDEX_EAX; 1024 sibBaseBase = SIB_BASE_EAX; 1025 break; 1026 case 8: 1027 sibIndexBase = SIB_INDEX_RAX; 1028 sibBaseBase = SIB_BASE_RAX; 1029 break; 1030 } 1031 1032 if (consumeByte(insn, &insn->sib)) 1033 return -1; 1034 1035 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 1036 1037 switch (index) { 1038 case 0x4: 1039 insn->sibIndex = SIB_INDEX_NONE; 1040 break; 1041 default: 1042 insn->sibIndex = (SIBIndex)(sibIndexBase + index); 1043 if (insn->sibIndex == SIB_INDEX_sib || 1044 insn->sibIndex == SIB_INDEX_sib64) 1045 insn->sibIndex = SIB_INDEX_NONE; 1046 break; 1047 } 1048 1049 switch (scaleFromSIB(insn->sib)) { 1050 case 0: 1051 insn->sibScale = 1; 1052 break; 1053 case 1: 1054 insn->sibScale = 2; 1055 break; 1056 case 2: 1057 insn->sibScale = 4; 1058 break; 1059 case 3: 1060 insn->sibScale = 8; 1061 break; 1062 } 1063 1064 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 1065 1066 switch (base) { 1067 case 0x5:
|
1082 break; 1083 case 0x3: 1084 debug("Cannot have Mod = 0b11 and a SIB byte"); 1085 return -1; 1086 } 1087 break; 1088 default: 1089 insn->sibBase = (SIBBase)(sibBaseBase + base); 1090 break; 1091 } 1092 1093 return 0; 1094} 1095 1096/* 1097 * readDisplacement - Consumes the displacement of an instruction. 1098 * 1099 * @param insn - The instruction whose displacement is to be read. 1100 * @return - 0 if the displacement byte was successfully read; nonzero 1101 * otherwise. 1102 */ 1103static int readDisplacement(struct InternalInstruction* insn) { 1104 int8_t d8; 1105 int16_t d16; 1106 int32_t d32; 1107 1108 dbgprintf(insn, "readDisplacement()"); 1109 1110 if (insn->consumedDisplacement) 1111 return 0; 1112 1113 insn->consumedDisplacement = TRUE; 1114 insn->displacementOffset = insn->readerCursor - insn->startLocation; 1115 1116 switch (insn->eaDisplacement) { 1117 case EA_DISP_NONE: 1118 insn->consumedDisplacement = FALSE; 1119 break; 1120 case EA_DISP_8: 1121 if (consumeInt8(insn, &d8)) 1122 return -1; 1123 insn->displacement = d8; 1124 break; 1125 case EA_DISP_16: 1126 if (consumeInt16(insn, &d16)) 1127 return -1; 1128 insn->displacement = d16; 1129 break; 1130 case EA_DISP_32: 1131 if (consumeInt32(insn, &d32)) 1132 return -1; 1133 insn->displacement = d32; 1134 break; 1135 } 1136 1137 insn->consumedDisplacement = TRUE; 1138 return 0; 1139} 1140 1141/* 1142 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1143 * displacement) for an instruction and interprets it. 1144 * 1145 * @param insn - The instruction whose addressing information is to be read. 1146 * @return - 0 if the information was successfully read; nonzero otherwise. 1147 */ 1148static int readModRM(struct InternalInstruction* insn) { 1149 uint8_t mod, rm, reg; 1150 1151 dbgprintf(insn, "readModRM()"); 1152 1153 if (insn->consumedModRM) 1154 return 0; 1155 1156 if (consumeByte(insn, &insn->modRM)) 1157 return -1; 1158 insn->consumedModRM = TRUE; 1159 1160 mod = modFromModRM(insn->modRM); 1161 rm = rmFromModRM(insn->modRM); 1162 reg = regFromModRM(insn->modRM); 1163 1164 /* 1165 * This goes by insn->registerSize to pick the correct register, which messes 1166 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1167 * fixupReg(). 1168 */ 1169 switch (insn->registerSize) { 1170 case 2: 1171 insn->regBase = MODRM_REG_AX; 1172 insn->eaRegBase = EA_REG_AX; 1173 break; 1174 case 4: 1175 insn->regBase = MODRM_REG_EAX; 1176 insn->eaRegBase = EA_REG_EAX; 1177 break; 1178 case 8: 1179 insn->regBase = MODRM_REG_RAX; 1180 insn->eaRegBase = EA_REG_RAX; 1181 break; 1182 } 1183 1184 reg |= rFromREX(insn->rexPrefix) << 3; 1185 rm |= bFromREX(insn->rexPrefix) << 3; 1186 1187 insn->reg = (Reg)(insn->regBase + reg); 1188 1189 switch (insn->addressSize) { 1190 case 2: 1191 insn->eaBaseBase = EA_BASE_BX_SI; 1192 1193 switch (mod) { 1194 case 0x0: 1195 if (rm == 0x6) { 1196 insn->eaBase = EA_BASE_NONE; 1197 insn->eaDisplacement = EA_DISP_16; 1198 if (readDisplacement(insn)) 1199 return -1; 1200 } else { 1201 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1202 insn->eaDisplacement = EA_DISP_NONE; 1203 } 1204 break; 1205 case 0x1: 1206 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1207 insn->eaDisplacement = EA_DISP_8; 1208 if (readDisplacement(insn)) 1209 return -1; 1210 break; 1211 case 0x2: 1212 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1213 insn->eaDisplacement = EA_DISP_16; 1214 if (readDisplacement(insn)) 1215 return -1; 1216 break; 1217 case 0x3: 1218 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1219 if (readDisplacement(insn)) 1220 return -1; 1221 break; 1222 } 1223 break; 1224 case 4: 1225 case 8: 1226 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1227 1228 switch (mod) { 1229 case 0x0: 1230 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1231 switch (rm) { 1232 case 0x4: 1233 case 0xc: /* in case REXW.b is set */ 1234 insn->eaBase = (insn->addressSize == 4 ? 1235 EA_BASE_sib : EA_BASE_sib64); 1236 readSIB(insn); 1237 if (readDisplacement(insn)) 1238 return -1; 1239 break; 1240 case 0x5: 1241 insn->eaBase = EA_BASE_NONE; 1242 insn->eaDisplacement = EA_DISP_32; 1243 if (readDisplacement(insn)) 1244 return -1; 1245 break; 1246 default: 1247 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1248 break; 1249 } 1250 break; 1251 case 0x1: 1252 case 0x2: 1253 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1254 switch (rm) { 1255 case 0x4: 1256 case 0xc: /* in case REXW.b is set */ 1257 insn->eaBase = EA_BASE_sib; 1258 readSIB(insn); 1259 if (readDisplacement(insn)) 1260 return -1; 1261 break; 1262 default: 1263 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1264 if (readDisplacement(insn)) 1265 return -1; 1266 break; 1267 } 1268 break; 1269 case 0x3: 1270 insn->eaDisplacement = EA_DISP_NONE; 1271 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1272 break; 1273 } 1274 break; 1275 } /* switch (insn->addressSize) */ 1276 1277 return 0; 1278} 1279 1280#define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1281 static uint8_t name(struct InternalInstruction *insn, \ 1282 OperandType type, \ 1283 uint8_t index, \ 1284 uint8_t *valid) { \ 1285 *valid = 1; \ 1286 switch (type) { \ 1287 default: \ 1288 debug("Unhandled register type"); \ 1289 *valid = 0; \ 1290 return 0; \ 1291 case TYPE_Rv: \ 1292 return base + index; \ 1293 case TYPE_R8: \ 1294 if (insn->rexPrefix && \ 1295 index >= 4 && index <= 7) { \ 1296 return prefix##_SPL + (index - 4); \ 1297 } else { \ 1298 return prefix##_AL + index; \ 1299 } \ 1300 case TYPE_R16: \ 1301 return prefix##_AX + index; \ 1302 case TYPE_R32: \ 1303 return prefix##_EAX + index; \ 1304 case TYPE_R64: \ 1305 return prefix##_RAX + index; \ 1306 case TYPE_XMM512: \ 1307 return prefix##_ZMM0 + index; \ 1308 case TYPE_XMM256: \ 1309 return prefix##_YMM0 + index; \ 1310 case TYPE_XMM128: \ 1311 case TYPE_XMM64: \ 1312 case TYPE_XMM32: \ 1313 case TYPE_XMM: \ 1314 return prefix##_XMM0 + index; \ 1315 case TYPE_MM64: \ 1316 case TYPE_MM32: \ 1317 case TYPE_MM: \ 1318 if (index > 7) \ 1319 *valid = 0; \ 1320 return prefix##_MM0 + index; \ 1321 case TYPE_SEGMENTREG: \ 1322 if (index > 5) \ 1323 *valid = 0; \ 1324 return prefix##_ES + index; \ 1325 case TYPE_DEBUGREG: \ 1326 if (index > 7) \ 1327 *valid = 0; \ 1328 return prefix##_DR0 + index; \ 1329 case TYPE_CONTROLREG: \ 1330 if (index > 8) \ 1331 *valid = 0; \ 1332 return prefix##_CR0 + index; \ 1333 } \ 1334 } 1335 1336/* 1337 * fixup*Value - Consults an operand type to determine the meaning of the 1338 * reg or R/M field. If the operand is an XMM operand, for example, an 1339 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1340 * misinterpret it as. 1341 * 1342 * @param insn - The instruction containing the operand. 1343 * @param type - The operand type. 1344 * @param index - The existing value of the field as reported by readModRM(). 1345 * @param valid - The address of a uint8_t. The target is set to 1 if the 1346 * field is valid for the register class; 0 if not. 1347 * @return - The proper value. 1348 */ 1349GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1350GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1351 1352/* 1353 * fixupReg - Consults an operand specifier to determine which of the 1354 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1355 * 1356 * @param insn - See fixup*Value(). 1357 * @param op - The operand specifier. 1358 * @return - 0 if fixup was successful; -1 if the register returned was 1359 * invalid for its class. 1360 */ 1361static int fixupReg(struct InternalInstruction *insn, 1362 const struct OperandSpecifier *op) { 1363 uint8_t valid; 1364 1365 dbgprintf(insn, "fixupReg()"); 1366 1367 switch ((OperandEncoding)op->encoding) { 1368 default: 1369 debug("Expected a REG or R/M encoding in fixupReg"); 1370 return -1; 1371 case ENCODING_VVVV: 1372 insn->vvvv = (Reg)fixupRegValue(insn, 1373 (OperandType)op->type, 1374 insn->vvvv, 1375 &valid); 1376 if (!valid) 1377 return -1; 1378 break; 1379 case ENCODING_REG: 1380 insn->reg = (Reg)fixupRegValue(insn, 1381 (OperandType)op->type, 1382 insn->reg - insn->regBase, 1383 &valid); 1384 if (!valid) 1385 return -1; 1386 break; 1387 case ENCODING_RM: 1388 if (insn->eaBase >= insn->eaRegBase) { 1389 insn->eaBase = (EABase)fixupRMValue(insn, 1390 (OperandType)op->type, 1391 insn->eaBase - insn->eaRegBase, 1392 &valid); 1393 if (!valid) 1394 return -1; 1395 } 1396 break; 1397 } 1398 1399 return 0; 1400} 1401 1402/* 1403 * readOpcodeModifier - Reads an operand from the opcode field of an 1404 * instruction. Handles AddRegFrm instructions. 1405 * 1406 * @param insn - The instruction whose opcode field is to be read. 1407 * @param inModRM - Indicates that the opcode field is to be read from the 1408 * ModR/M extension; useful for escape opcodes 1409 * @return - 0 on success; nonzero otherwise. 1410 */ 1411static int readOpcodeModifier(struct InternalInstruction* insn) { 1412 dbgprintf(insn, "readOpcodeModifier()"); 1413 1414 if (insn->consumedOpcodeModifier) 1415 return 0; 1416 1417 insn->consumedOpcodeModifier = TRUE; 1418 1419 switch (insn->spec->modifierType) { 1420 default: 1421 debug("Unknown modifier type."); 1422 return -1; 1423 case MODIFIER_NONE: 1424 debug("No modifier but an operand expects one."); 1425 return -1; 1426 case MODIFIER_OPCODE: 1427 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1428 return 0; 1429 case MODIFIER_MODRM: 1430 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1431 return 0; 1432 } 1433} 1434 1435/* 1436 * readOpcodeRegister - Reads an operand from the opcode field of an 1437 * instruction and interprets it appropriately given the operand width. 1438 * Handles AddRegFrm instructions. 1439 * 1440 * @param insn - See readOpcodeModifier(). 1441 * @param size - The width (in bytes) of the register being specified. 1442 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1443 * RAX. 1444 * @return - 0 on success; nonzero otherwise. 1445 */ 1446static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1447 dbgprintf(insn, "readOpcodeRegister()"); 1448 1449 if (readOpcodeModifier(insn)) 1450 return -1; 1451 1452 if (size == 0) 1453 size = insn->registerSize; 1454 1455 switch (size) { 1456 case 1: 1457 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1458 | insn->opcodeModifier)); 1459 if (insn->rexPrefix && 1460 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1461 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1462 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1463 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1464 } 1465 1466 break; 1467 case 2: 1468 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1469 + ((bFromREX(insn->rexPrefix) << 3) 1470 | insn->opcodeModifier)); 1471 break; 1472 case 4: 1473 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1474 + ((bFromREX(insn->rexPrefix) << 3) 1475 | insn->opcodeModifier)); 1476 break; 1477 case 8: 1478 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1479 + ((bFromREX(insn->rexPrefix) << 3) 1480 | insn->opcodeModifier)); 1481 break; 1482 } 1483 1484 return 0; 1485} 1486 1487/* 1488 * readImmediate - Consumes an immediate operand from an instruction, given the 1489 * desired operand size. 1490 * 1491 * @param insn - The instruction whose operand is to be read. 1492 * @param size - The width (in bytes) of the operand. 1493 * @return - 0 if the immediate was successfully consumed; nonzero 1494 * otherwise. 1495 */ 1496static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1497 uint8_t imm8; 1498 uint16_t imm16; 1499 uint32_t imm32; 1500 uint64_t imm64; 1501 1502 dbgprintf(insn, "readImmediate()"); 1503 1504 if (insn->numImmediatesConsumed == 2) { 1505 debug("Already consumed two immediates"); 1506 return -1; 1507 } 1508 1509 if (size == 0) 1510 size = insn->immediateSize; 1511 else 1512 insn->immediateSize = size; 1513 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1514 1515 switch (size) { 1516 case 1: 1517 if (consumeByte(insn, &imm8)) 1518 return -1; 1519 insn->immediates[insn->numImmediatesConsumed] = imm8; 1520 break; 1521 case 2: 1522 if (consumeUInt16(insn, &imm16)) 1523 return -1; 1524 insn->immediates[insn->numImmediatesConsumed] = imm16; 1525 break; 1526 case 4: 1527 if (consumeUInt32(insn, &imm32)) 1528 return -1; 1529 insn->immediates[insn->numImmediatesConsumed] = imm32; 1530 break; 1531 case 8: 1532 if (consumeUInt64(insn, &imm64)) 1533 return -1; 1534 insn->immediates[insn->numImmediatesConsumed] = imm64; 1535 break; 1536 } 1537 1538 insn->numImmediatesConsumed++; 1539 1540 return 0; 1541} 1542 1543/* 1544 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1545 * 1546 * @param insn - The instruction whose operand is to be read. 1547 * @return - 0 if the vvvv was successfully consumed; nonzero 1548 * otherwise. 1549 */ 1550static int readVVVV(struct InternalInstruction* insn) { 1551 dbgprintf(insn, "readVVVV()"); 1552 1553 if (insn->vexXopType == TYPE_VEX_3B) 1554 insn->vvvv = vvvvFromVEX3of3(insn->vexXopPrefix[2]); 1555 else if (insn->vexXopType == TYPE_VEX_2B) 1556 insn->vvvv = vvvvFromVEX2of2(insn->vexXopPrefix[1]); 1557 else if (insn->vexXopType == TYPE_XOP) 1558 insn->vvvv = vvvvFromXOP3of3(insn->vexXopPrefix[2]); 1559 else 1560 return -1; 1561 1562 if (insn->mode != MODE_64BIT) 1563 insn->vvvv &= 0x7; 1564 1565 return 0; 1566} 1567 1568/* 1569 * readOperands - Consults the specifier for an instruction and consumes all 1570 * operands for that instruction, interpreting them as it goes. 1571 * 1572 * @param insn - The instruction whose operands are to be read and interpreted. 1573 * @return - 0 if all operands could be read; nonzero otherwise. 1574 */ 1575static int readOperands(struct InternalInstruction* insn) { 1576 int index; 1577 int hasVVVV, needVVVV; 1578 int sawRegImm = 0; 1579 1580 dbgprintf(insn, "readOperands()"); 1581 1582 /* If non-zero vvvv specified, need to make sure one of the operands 1583 uses it. */ 1584 hasVVVV = !readVVVV(insn); 1585 needVVVV = hasVVVV && (insn->vvvv != 0); 1586 1587 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1588 switch (x86OperandSets[insn->spec->operands][index].encoding) { 1589 case ENCODING_NONE: 1590 break; 1591 case ENCODING_REG: 1592 case ENCODING_RM: 1593 if (readModRM(insn)) 1594 return -1; 1595 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1596 return -1; 1597 break; 1598 case ENCODING_CB: 1599 case ENCODING_CW: 1600 case ENCODING_CD: 1601 case ENCODING_CP: 1602 case ENCODING_CO: 1603 case ENCODING_CT: 1604 dbgprintf(insn, "We currently don't hande code-offset encodings"); 1605 return -1; 1606 case ENCODING_IB: 1607 if (sawRegImm) { 1608 /* Saw a register immediate so don't read again and instead split the 1609 previous immediate. FIXME: This is a hack. */ 1610 insn->immediates[insn->numImmediatesConsumed] = 1611 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1612 ++insn->numImmediatesConsumed; 1613 break; 1614 } 1615 if (readImmediate(insn, 1)) 1616 return -1; 1617 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && 1618 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1619 return -1; 1620 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && 1621 insn->immediates[insn->numImmediatesConsumed - 1] > 31) 1622 return -1; 1623 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || 1624 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) 1625 sawRegImm = 1; 1626 break; 1627 case ENCODING_IW: 1628 if (readImmediate(insn, 2)) 1629 return -1; 1630 break; 1631 case ENCODING_ID: 1632 if (readImmediate(insn, 4)) 1633 return -1; 1634 break; 1635 case ENCODING_IO: 1636 if (readImmediate(insn, 8)) 1637 return -1; 1638 break; 1639 case ENCODING_Iv: 1640 if (readImmediate(insn, insn->immediateSize)) 1641 return -1; 1642 break; 1643 case ENCODING_Ia: 1644 if (readImmediate(insn, insn->addressSize)) 1645 return -1; 1646 break; 1647 case ENCODING_RB: 1648 if (readOpcodeRegister(insn, 1)) 1649 return -1; 1650 break; 1651 case ENCODING_RW: 1652 if (readOpcodeRegister(insn, 2)) 1653 return -1; 1654 break; 1655 case ENCODING_RD: 1656 if (readOpcodeRegister(insn, 4)) 1657 return -1; 1658 break; 1659 case ENCODING_RO: 1660 if (readOpcodeRegister(insn, 8)) 1661 return -1; 1662 break; 1663 case ENCODING_Rv: 1664 if (readOpcodeRegister(insn, 0)) 1665 return -1; 1666 break; 1667 case ENCODING_I: 1668 if (readOpcodeModifier(insn)) 1669 return -1; 1670 break; 1671 case ENCODING_VVVV: 1672 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1673 if (!hasVVVV) 1674 return -1; 1675 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1676 return -1; 1677 break; 1678 case ENCODING_DUP: 1679 break; 1680 default: 1681 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1682 return -1; 1683 } 1684 } 1685 1686 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1687 if (needVVVV) return -1; 1688 1689 return 0; 1690} 1691 1692/* 1693 * decodeInstruction - Reads and interprets a full instruction provided by the 1694 * user. 1695 * 1696 * @param insn - A pointer to the instruction to be populated. Must be 1697 * pre-allocated. 1698 * @param reader - The function to be used to read the instruction's bytes. 1699 * @param readerArg - A generic argument to be passed to the reader to store 1700 * any internal state. 1701 * @param logger - If non-NULL, the function to be used to write log messages 1702 * and warnings. 1703 * @param loggerArg - A generic argument to be passed to the logger to store 1704 * any internal state. 1705 * @param startLoc - The address (in the reader's address space) of the first 1706 * byte in the instruction. 1707 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1708 * decode the instruction in. 1709 * @return - 0 if the instruction's memory could be read; nonzero if 1710 * not. 1711 */ 1712int decodeInstruction(struct InternalInstruction* insn, 1713 byteReader_t reader, 1714 const void* readerArg, 1715 dlog_t logger, 1716 void* loggerArg, 1717 const void* miiArg, 1718 uint64_t startLoc, 1719 DisassemblerMode mode) { 1720 memset(insn, 0, sizeof(struct InternalInstruction)); 1721 1722 insn->reader = reader; 1723 insn->readerArg = readerArg; 1724 insn->dlog = logger; 1725 insn->dlogArg = loggerArg; 1726 insn->startLocation = startLoc; 1727 insn->readerCursor = startLoc; 1728 insn->mode = mode; 1729 insn->numImmediatesConsumed = 0; 1730 1731 if (readPrefixes(insn) || 1732 readOpcode(insn) || 1733 getID(insn, miiArg) || 1734 insn->instructionID == 0 || 1735 readOperands(insn)) 1736 return -1; 1737 1738 insn->operands = &x86OperandSets[insn->spec->operands][0]; 1739 1740 insn->length = insn->readerCursor - insn->startLocation; 1741 1742 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1743 startLoc, insn->readerCursor, insn->length); 1744 1745 if (insn->length > 15) 1746 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1747 1748 return 0; 1749}
| 1081 break; 1082 case 0x3: 1083 debug("Cannot have Mod = 0b11 and a SIB byte"); 1084 return -1; 1085 } 1086 break; 1087 default: 1088 insn->sibBase = (SIBBase)(sibBaseBase + base); 1089 break; 1090 } 1091 1092 return 0; 1093} 1094 1095/* 1096 * readDisplacement - Consumes the displacement of an instruction. 1097 * 1098 * @param insn - The instruction whose displacement is to be read. 1099 * @return - 0 if the displacement byte was successfully read; nonzero 1100 * otherwise. 1101 */ 1102static int readDisplacement(struct InternalInstruction* insn) { 1103 int8_t d8; 1104 int16_t d16; 1105 int32_t d32; 1106 1107 dbgprintf(insn, "readDisplacement()"); 1108 1109 if (insn->consumedDisplacement) 1110 return 0; 1111 1112 insn->consumedDisplacement = TRUE; 1113 insn->displacementOffset = insn->readerCursor - insn->startLocation; 1114 1115 switch (insn->eaDisplacement) { 1116 case EA_DISP_NONE: 1117 insn->consumedDisplacement = FALSE; 1118 break; 1119 case EA_DISP_8: 1120 if (consumeInt8(insn, &d8)) 1121 return -1; 1122 insn->displacement = d8; 1123 break; 1124 case EA_DISP_16: 1125 if (consumeInt16(insn, &d16)) 1126 return -1; 1127 insn->displacement = d16; 1128 break; 1129 case EA_DISP_32: 1130 if (consumeInt32(insn, &d32)) 1131 return -1; 1132 insn->displacement = d32; 1133 break; 1134 } 1135 1136 insn->consumedDisplacement = TRUE; 1137 return 0; 1138} 1139 1140/* 1141 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and 1142 * displacement) for an instruction and interprets it. 1143 * 1144 * @param insn - The instruction whose addressing information is to be read. 1145 * @return - 0 if the information was successfully read; nonzero otherwise. 1146 */ 1147static int readModRM(struct InternalInstruction* insn) { 1148 uint8_t mod, rm, reg; 1149 1150 dbgprintf(insn, "readModRM()"); 1151 1152 if (insn->consumedModRM) 1153 return 0; 1154 1155 if (consumeByte(insn, &insn->modRM)) 1156 return -1; 1157 insn->consumedModRM = TRUE; 1158 1159 mod = modFromModRM(insn->modRM); 1160 rm = rmFromModRM(insn->modRM); 1161 reg = regFromModRM(insn->modRM); 1162 1163 /* 1164 * This goes by insn->registerSize to pick the correct register, which messes 1165 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in 1166 * fixupReg(). 1167 */ 1168 switch (insn->registerSize) { 1169 case 2: 1170 insn->regBase = MODRM_REG_AX; 1171 insn->eaRegBase = EA_REG_AX; 1172 break; 1173 case 4: 1174 insn->regBase = MODRM_REG_EAX; 1175 insn->eaRegBase = EA_REG_EAX; 1176 break; 1177 case 8: 1178 insn->regBase = MODRM_REG_RAX; 1179 insn->eaRegBase = EA_REG_RAX; 1180 break; 1181 } 1182 1183 reg |= rFromREX(insn->rexPrefix) << 3; 1184 rm |= bFromREX(insn->rexPrefix) << 3; 1185 1186 insn->reg = (Reg)(insn->regBase + reg); 1187 1188 switch (insn->addressSize) { 1189 case 2: 1190 insn->eaBaseBase = EA_BASE_BX_SI; 1191 1192 switch (mod) { 1193 case 0x0: 1194 if (rm == 0x6) { 1195 insn->eaBase = EA_BASE_NONE; 1196 insn->eaDisplacement = EA_DISP_16; 1197 if (readDisplacement(insn)) 1198 return -1; 1199 } else { 1200 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1201 insn->eaDisplacement = EA_DISP_NONE; 1202 } 1203 break; 1204 case 0x1: 1205 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1206 insn->eaDisplacement = EA_DISP_8; 1207 if (readDisplacement(insn)) 1208 return -1; 1209 break; 1210 case 0x2: 1211 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1212 insn->eaDisplacement = EA_DISP_16; 1213 if (readDisplacement(insn)) 1214 return -1; 1215 break; 1216 case 0x3: 1217 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1218 if (readDisplacement(insn)) 1219 return -1; 1220 break; 1221 } 1222 break; 1223 case 4: 1224 case 8: 1225 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 1226 1227 switch (mod) { 1228 case 0x0: 1229 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ 1230 switch (rm) { 1231 case 0x4: 1232 case 0xc: /* in case REXW.b is set */ 1233 insn->eaBase = (insn->addressSize == 4 ? 1234 EA_BASE_sib : EA_BASE_sib64); 1235 readSIB(insn); 1236 if (readDisplacement(insn)) 1237 return -1; 1238 break; 1239 case 0x5: 1240 insn->eaBase = EA_BASE_NONE; 1241 insn->eaDisplacement = EA_DISP_32; 1242 if (readDisplacement(insn)) 1243 return -1; 1244 break; 1245 default: 1246 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1247 break; 1248 } 1249 break; 1250 case 0x1: 1251 case 0x2: 1252 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 1253 switch (rm) { 1254 case 0x4: 1255 case 0xc: /* in case REXW.b is set */ 1256 insn->eaBase = EA_BASE_sib; 1257 readSIB(insn); 1258 if (readDisplacement(insn)) 1259 return -1; 1260 break; 1261 default: 1262 insn->eaBase = (EABase)(insn->eaBaseBase + rm); 1263 if (readDisplacement(insn)) 1264 return -1; 1265 break; 1266 } 1267 break; 1268 case 0x3: 1269 insn->eaDisplacement = EA_DISP_NONE; 1270 insn->eaBase = (EABase)(insn->eaRegBase + rm); 1271 break; 1272 } 1273 break; 1274 } /* switch (insn->addressSize) */ 1275 1276 return 0; 1277} 1278 1279#define GENERIC_FIXUP_FUNC(name, base, prefix) \ 1280 static uint8_t name(struct InternalInstruction *insn, \ 1281 OperandType type, \ 1282 uint8_t index, \ 1283 uint8_t *valid) { \ 1284 *valid = 1; \ 1285 switch (type) { \ 1286 default: \ 1287 debug("Unhandled register type"); \ 1288 *valid = 0; \ 1289 return 0; \ 1290 case TYPE_Rv: \ 1291 return base + index; \ 1292 case TYPE_R8: \ 1293 if (insn->rexPrefix && \ 1294 index >= 4 && index <= 7) { \ 1295 return prefix##_SPL + (index - 4); \ 1296 } else { \ 1297 return prefix##_AL + index; \ 1298 } \ 1299 case TYPE_R16: \ 1300 return prefix##_AX + index; \ 1301 case TYPE_R32: \ 1302 return prefix##_EAX + index; \ 1303 case TYPE_R64: \ 1304 return prefix##_RAX + index; \ 1305 case TYPE_XMM512: \ 1306 return prefix##_ZMM0 + index; \ 1307 case TYPE_XMM256: \ 1308 return prefix##_YMM0 + index; \ 1309 case TYPE_XMM128: \ 1310 case TYPE_XMM64: \ 1311 case TYPE_XMM32: \ 1312 case TYPE_XMM: \ 1313 return prefix##_XMM0 + index; \ 1314 case TYPE_MM64: \ 1315 case TYPE_MM32: \ 1316 case TYPE_MM: \ 1317 if (index > 7) \ 1318 *valid = 0; \ 1319 return prefix##_MM0 + index; \ 1320 case TYPE_SEGMENTREG: \ 1321 if (index > 5) \ 1322 *valid = 0; \ 1323 return prefix##_ES + index; \ 1324 case TYPE_DEBUGREG: \ 1325 if (index > 7) \ 1326 *valid = 0; \ 1327 return prefix##_DR0 + index; \ 1328 case TYPE_CONTROLREG: \ 1329 if (index > 8) \ 1330 *valid = 0; \ 1331 return prefix##_CR0 + index; \ 1332 } \ 1333 } 1334 1335/* 1336 * fixup*Value - Consults an operand type to determine the meaning of the 1337 * reg or R/M field. If the operand is an XMM operand, for example, an 1338 * operand would be XMM0 instead of AX, which readModRM() would otherwise 1339 * misinterpret it as. 1340 * 1341 * @param insn - The instruction containing the operand. 1342 * @param type - The operand type. 1343 * @param index - The existing value of the field as reported by readModRM(). 1344 * @param valid - The address of a uint8_t. The target is set to 1 if the 1345 * field is valid for the register class; 0 if not. 1346 * @return - The proper value. 1347 */ 1348GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 1349GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 1350 1351/* 1352 * fixupReg - Consults an operand specifier to determine which of the 1353 * fixup*Value functions to use in correcting readModRM()'ss interpretation. 1354 * 1355 * @param insn - See fixup*Value(). 1356 * @param op - The operand specifier. 1357 * @return - 0 if fixup was successful; -1 if the register returned was 1358 * invalid for its class. 1359 */ 1360static int fixupReg(struct InternalInstruction *insn, 1361 const struct OperandSpecifier *op) { 1362 uint8_t valid; 1363 1364 dbgprintf(insn, "fixupReg()"); 1365 1366 switch ((OperandEncoding)op->encoding) { 1367 default: 1368 debug("Expected a REG or R/M encoding in fixupReg"); 1369 return -1; 1370 case ENCODING_VVVV: 1371 insn->vvvv = (Reg)fixupRegValue(insn, 1372 (OperandType)op->type, 1373 insn->vvvv, 1374 &valid); 1375 if (!valid) 1376 return -1; 1377 break; 1378 case ENCODING_REG: 1379 insn->reg = (Reg)fixupRegValue(insn, 1380 (OperandType)op->type, 1381 insn->reg - insn->regBase, 1382 &valid); 1383 if (!valid) 1384 return -1; 1385 break; 1386 case ENCODING_RM: 1387 if (insn->eaBase >= insn->eaRegBase) { 1388 insn->eaBase = (EABase)fixupRMValue(insn, 1389 (OperandType)op->type, 1390 insn->eaBase - insn->eaRegBase, 1391 &valid); 1392 if (!valid) 1393 return -1; 1394 } 1395 break; 1396 } 1397 1398 return 0; 1399} 1400 1401/* 1402 * readOpcodeModifier - Reads an operand from the opcode field of an 1403 * instruction. Handles AddRegFrm instructions. 1404 * 1405 * @param insn - The instruction whose opcode field is to be read. 1406 * @param inModRM - Indicates that the opcode field is to be read from the 1407 * ModR/M extension; useful for escape opcodes 1408 * @return - 0 on success; nonzero otherwise. 1409 */ 1410static int readOpcodeModifier(struct InternalInstruction* insn) { 1411 dbgprintf(insn, "readOpcodeModifier()"); 1412 1413 if (insn->consumedOpcodeModifier) 1414 return 0; 1415 1416 insn->consumedOpcodeModifier = TRUE; 1417 1418 switch (insn->spec->modifierType) { 1419 default: 1420 debug("Unknown modifier type."); 1421 return -1; 1422 case MODIFIER_NONE: 1423 debug("No modifier but an operand expects one."); 1424 return -1; 1425 case MODIFIER_OPCODE: 1426 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; 1427 return 0; 1428 case MODIFIER_MODRM: 1429 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; 1430 return 0; 1431 } 1432} 1433 1434/* 1435 * readOpcodeRegister - Reads an operand from the opcode field of an 1436 * instruction and interprets it appropriately given the operand width. 1437 * Handles AddRegFrm instructions. 1438 * 1439 * @param insn - See readOpcodeModifier(). 1440 * @param size - The width (in bytes) of the register being specified. 1441 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1442 * RAX. 1443 * @return - 0 on success; nonzero otherwise. 1444 */ 1445static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { 1446 dbgprintf(insn, "readOpcodeRegister()"); 1447 1448 if (readOpcodeModifier(insn)) 1449 return -1; 1450 1451 if (size == 0) 1452 size = insn->registerSize; 1453 1454 switch (size) { 1455 case 1: 1456 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 1457 | insn->opcodeModifier)); 1458 if (insn->rexPrefix && 1459 insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1460 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1461 insn->opcodeRegister = (Reg)(MODRM_REG_SPL 1462 + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1463 } 1464 1465 break; 1466 case 2: 1467 insn->opcodeRegister = (Reg)(MODRM_REG_AX 1468 + ((bFromREX(insn->rexPrefix) << 3) 1469 | insn->opcodeModifier)); 1470 break; 1471 case 4: 1472 insn->opcodeRegister = (Reg)(MODRM_REG_EAX 1473 + ((bFromREX(insn->rexPrefix) << 3) 1474 | insn->opcodeModifier)); 1475 break; 1476 case 8: 1477 insn->opcodeRegister = (Reg)(MODRM_REG_RAX 1478 + ((bFromREX(insn->rexPrefix) << 3) 1479 | insn->opcodeModifier)); 1480 break; 1481 } 1482 1483 return 0; 1484} 1485 1486/* 1487 * readImmediate - Consumes an immediate operand from an instruction, given the 1488 * desired operand size. 1489 * 1490 * @param insn - The instruction whose operand is to be read. 1491 * @param size - The width (in bytes) of the operand. 1492 * @return - 0 if the immediate was successfully consumed; nonzero 1493 * otherwise. 1494 */ 1495static int readImmediate(struct InternalInstruction* insn, uint8_t size) { 1496 uint8_t imm8; 1497 uint16_t imm16; 1498 uint32_t imm32; 1499 uint64_t imm64; 1500 1501 dbgprintf(insn, "readImmediate()"); 1502 1503 if (insn->numImmediatesConsumed == 2) { 1504 debug("Already consumed two immediates"); 1505 return -1; 1506 } 1507 1508 if (size == 0) 1509 size = insn->immediateSize; 1510 else 1511 insn->immediateSize = size; 1512 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1513 1514 switch (size) { 1515 case 1: 1516 if (consumeByte(insn, &imm8)) 1517 return -1; 1518 insn->immediates[insn->numImmediatesConsumed] = imm8; 1519 break; 1520 case 2: 1521 if (consumeUInt16(insn, &imm16)) 1522 return -1; 1523 insn->immediates[insn->numImmediatesConsumed] = imm16; 1524 break; 1525 case 4: 1526 if (consumeUInt32(insn, &imm32)) 1527 return -1; 1528 insn->immediates[insn->numImmediatesConsumed] = imm32; 1529 break; 1530 case 8: 1531 if (consumeUInt64(insn, &imm64)) 1532 return -1; 1533 insn->immediates[insn->numImmediatesConsumed] = imm64; 1534 break; 1535 } 1536 1537 insn->numImmediatesConsumed++; 1538 1539 return 0; 1540} 1541 1542/* 1543 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. 1544 * 1545 * @param insn - The instruction whose operand is to be read. 1546 * @return - 0 if the vvvv was successfully consumed; nonzero 1547 * otherwise. 1548 */ 1549static int readVVVV(struct InternalInstruction* insn) { 1550 dbgprintf(insn, "readVVVV()"); 1551 1552 if (insn->vexXopType == TYPE_VEX_3B) 1553 insn->vvvv = vvvvFromVEX3of3(insn->vexXopPrefix[2]); 1554 else if (insn->vexXopType == TYPE_VEX_2B) 1555 insn->vvvv = vvvvFromVEX2of2(insn->vexXopPrefix[1]); 1556 else if (insn->vexXopType == TYPE_XOP) 1557 insn->vvvv = vvvvFromXOP3of3(insn->vexXopPrefix[2]); 1558 else 1559 return -1; 1560 1561 if (insn->mode != MODE_64BIT) 1562 insn->vvvv &= 0x7; 1563 1564 return 0; 1565} 1566 1567/* 1568 * readOperands - Consults the specifier for an instruction and consumes all 1569 * operands for that instruction, interpreting them as it goes. 1570 * 1571 * @param insn - The instruction whose operands are to be read and interpreted. 1572 * @return - 0 if all operands could be read; nonzero otherwise. 1573 */ 1574static int readOperands(struct InternalInstruction* insn) { 1575 int index; 1576 int hasVVVV, needVVVV; 1577 int sawRegImm = 0; 1578 1579 dbgprintf(insn, "readOperands()"); 1580 1581 /* If non-zero vvvv specified, need to make sure one of the operands 1582 uses it. */ 1583 hasVVVV = !readVVVV(insn); 1584 needVVVV = hasVVVV && (insn->vvvv != 0); 1585 1586 for (index = 0; index < X86_MAX_OPERANDS; ++index) { 1587 switch (x86OperandSets[insn->spec->operands][index].encoding) { 1588 case ENCODING_NONE: 1589 break; 1590 case ENCODING_REG: 1591 case ENCODING_RM: 1592 if (readModRM(insn)) 1593 return -1; 1594 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1595 return -1; 1596 break; 1597 case ENCODING_CB: 1598 case ENCODING_CW: 1599 case ENCODING_CD: 1600 case ENCODING_CP: 1601 case ENCODING_CO: 1602 case ENCODING_CT: 1603 dbgprintf(insn, "We currently don't hande code-offset encodings"); 1604 return -1; 1605 case ENCODING_IB: 1606 if (sawRegImm) { 1607 /* Saw a register immediate so don't read again and instead split the 1608 previous immediate. FIXME: This is a hack. */ 1609 insn->immediates[insn->numImmediatesConsumed] = 1610 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1611 ++insn->numImmediatesConsumed; 1612 break; 1613 } 1614 if (readImmediate(insn, 1)) 1615 return -1; 1616 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && 1617 insn->immediates[insn->numImmediatesConsumed - 1] > 7) 1618 return -1; 1619 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && 1620 insn->immediates[insn->numImmediatesConsumed - 1] > 31) 1621 return -1; 1622 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || 1623 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) 1624 sawRegImm = 1; 1625 break; 1626 case ENCODING_IW: 1627 if (readImmediate(insn, 2)) 1628 return -1; 1629 break; 1630 case ENCODING_ID: 1631 if (readImmediate(insn, 4)) 1632 return -1; 1633 break; 1634 case ENCODING_IO: 1635 if (readImmediate(insn, 8)) 1636 return -1; 1637 break; 1638 case ENCODING_Iv: 1639 if (readImmediate(insn, insn->immediateSize)) 1640 return -1; 1641 break; 1642 case ENCODING_Ia: 1643 if (readImmediate(insn, insn->addressSize)) 1644 return -1; 1645 break; 1646 case ENCODING_RB: 1647 if (readOpcodeRegister(insn, 1)) 1648 return -1; 1649 break; 1650 case ENCODING_RW: 1651 if (readOpcodeRegister(insn, 2)) 1652 return -1; 1653 break; 1654 case ENCODING_RD: 1655 if (readOpcodeRegister(insn, 4)) 1656 return -1; 1657 break; 1658 case ENCODING_RO: 1659 if (readOpcodeRegister(insn, 8)) 1660 return -1; 1661 break; 1662 case ENCODING_Rv: 1663 if (readOpcodeRegister(insn, 0)) 1664 return -1; 1665 break; 1666 case ENCODING_I: 1667 if (readOpcodeModifier(insn)) 1668 return -1; 1669 break; 1670 case ENCODING_VVVV: 1671 needVVVV = 0; /* Mark that we have found a VVVV operand. */ 1672 if (!hasVVVV) 1673 return -1; 1674 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) 1675 return -1; 1676 break; 1677 case ENCODING_DUP: 1678 break; 1679 default: 1680 dbgprintf(insn, "Encountered an operand with an unknown encoding."); 1681 return -1; 1682 } 1683 } 1684 1685 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ 1686 if (needVVVV) return -1; 1687 1688 return 0; 1689} 1690 1691/* 1692 * decodeInstruction - Reads and interprets a full instruction provided by the 1693 * user. 1694 * 1695 * @param insn - A pointer to the instruction to be populated. Must be 1696 * pre-allocated. 1697 * @param reader - The function to be used to read the instruction's bytes. 1698 * @param readerArg - A generic argument to be passed to the reader to store 1699 * any internal state. 1700 * @param logger - If non-NULL, the function to be used to write log messages 1701 * and warnings. 1702 * @param loggerArg - A generic argument to be passed to the logger to store 1703 * any internal state. 1704 * @param startLoc - The address (in the reader's address space) of the first 1705 * byte in the instruction. 1706 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to 1707 * decode the instruction in. 1708 * @return - 0 if the instruction's memory could be read; nonzero if 1709 * not. 1710 */ 1711int decodeInstruction(struct InternalInstruction* insn, 1712 byteReader_t reader, 1713 const void* readerArg, 1714 dlog_t logger, 1715 void* loggerArg, 1716 const void* miiArg, 1717 uint64_t startLoc, 1718 DisassemblerMode mode) { 1719 memset(insn, 0, sizeof(struct InternalInstruction)); 1720 1721 insn->reader = reader; 1722 insn->readerArg = readerArg; 1723 insn->dlog = logger; 1724 insn->dlogArg = loggerArg; 1725 insn->startLocation = startLoc; 1726 insn->readerCursor = startLoc; 1727 insn->mode = mode; 1728 insn->numImmediatesConsumed = 0; 1729 1730 if (readPrefixes(insn) || 1731 readOpcode(insn) || 1732 getID(insn, miiArg) || 1733 insn->instructionID == 0 || 1734 readOperands(insn)) 1735 return -1; 1736 1737 insn->operands = &x86OperandSets[insn->spec->operands][0]; 1738 1739 insn->length = insn->readerCursor - insn->startLocation; 1740 1741 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", 1742 startLoc, insn->readerCursor, insn->length); 1743 1744 if (insn->length > 15) 1745 dbgprintf(insn, "Instruction exceeds 15-byte limit"); 1746 1747 return 0; 1748}
|