1/* udis86 - libudis86/decode.c 2 * 3 * Copyright (c) 2002-2009 Vivek Thampi 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, 7 * are permitted provided that the following conditions are met: 8 * 9 * * Redistributions of source code must retain the above copyright notice, 10 * this list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include "config.h" 28 29#if USE(UDIS86) 30 31#include "udis86_extern.h" 32#include "udis86_types.h" 33#include "udis86_input.h" 34#include "udis86_decode.h" 35#include <wtf/Assertions.h> 36 37#define dbg(x, n...) 38/* #define dbg printf */ 39 40#ifndef __UD_STANDALONE__ 41# include <string.h> 42#endif /* __UD_STANDALONE__ */ 43 44/* The max number of prefixes to an instruction */ 45#define MAX_PREFIXES 15 46 47/* instruction aliases and special cases */ 48static struct ud_itab_entry s_ie__invalid = 49 { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none }; 50 51static int 52decode_ext(struct ud *u, uint16_t ptr); 53 54 55static inline int 56eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) 57{ 58 if (dis_mode == 64) { 59 return rex_w ? 64 : (pfx_opr ? 16 : 32); 60 } else if (dis_mode == 32) { 61 return pfx_opr ? 16 : 32; 62 } else { 63 ASSERT(dis_mode == 16); 64 return pfx_opr ? 32 : 16; 65 } 66} 67 68 69static inline int 70eff_adr_mode(int dis_mode, int pfx_adr) 71{ 72 if (dis_mode == 64) { 73 return pfx_adr ? 32 : 64; 74 } else if (dis_mode == 32) { 75 return pfx_adr ? 16 : 32; 76 } else { 77 ASSERT(dis_mode == 16); 78 return pfx_adr ? 32 : 16; 79 } 80} 81 82 83/* Looks up mnemonic code in the mnemonic string table 84 * Returns NULL if the mnemonic code is invalid 85 */ 86const char * ud_lookup_mnemonic( enum ud_mnemonic_code c ) 87{ 88 return ud_mnemonics_str[ c ]; 89} 90 91 92/* 93 * decode_prefixes 94 * 95 * Extracts instruction prefixes. 96 */ 97static int 98decode_prefixes(struct ud *u) 99{ 100 unsigned int have_pfx = 1; 101 unsigned int i; 102 uint8_t curr; 103 104 /* if in error state, bail out */ 105 if ( u->error ) 106 return -1; 107 108 /* keep going as long as there are prefixes available */ 109 for ( i = 0; have_pfx ; ++i ) { 110 111 /* Get next byte. */ 112 ud_inp_next(u); 113 if ( u->error ) 114 return -1; 115 curr = ud_inp_curr( u ); 116 117 /* rex prefixes in 64bit mode */ 118 if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) { 119 u->pfx_rex = curr; 120 } else { 121 switch ( curr ) 122 { 123 case 0x2E : 124 u->pfx_seg = UD_R_CS; 125 u->pfx_rex = 0; 126 break; 127 case 0x36 : 128 u->pfx_seg = UD_R_SS; 129 u->pfx_rex = 0; 130 break; 131 case 0x3E : 132 u->pfx_seg = UD_R_DS; 133 u->pfx_rex = 0; 134 break; 135 case 0x26 : 136 u->pfx_seg = UD_R_ES; 137 u->pfx_rex = 0; 138 break; 139 case 0x64 : 140 u->pfx_seg = UD_R_FS; 141 u->pfx_rex = 0; 142 break; 143 case 0x65 : 144 u->pfx_seg = UD_R_GS; 145 u->pfx_rex = 0; 146 break; 147 case 0x67 : /* adress-size override prefix */ 148 u->pfx_adr = 0x67; 149 u->pfx_rex = 0; 150 break; 151 case 0xF0 : 152 u->pfx_lock = 0xF0; 153 u->pfx_rex = 0; 154 break; 155 case 0x66: 156 /* the 0x66 sse prefix is only effective if no other sse prefix 157 * has already been specified. 158 */ 159 if ( !u->pfx_insn ) u->pfx_insn = 0x66; 160 u->pfx_opr = 0x66; 161 u->pfx_rex = 0; 162 break; 163 case 0xF2: 164 u->pfx_insn = 0xF2; 165 u->pfx_repne = 0xF2; 166 u->pfx_rex = 0; 167 break; 168 case 0xF3: 169 u->pfx_insn = 0xF3; 170 u->pfx_rep = 0xF3; 171 u->pfx_repe = 0xF3; 172 u->pfx_rex = 0; 173 break; 174 default : 175 /* No more prefixes */ 176 have_pfx = 0; 177 break; 178 } 179 } 180 181 /* check if we reached max instruction length */ 182 if ( i + 1 == MAX_INSN_LENGTH ) { 183 u->error = 1; 184 break; 185 } 186 } 187 188 /* return status */ 189 if ( u->error ) 190 return -1; 191 192 /* rewind back one byte in stream, since the above loop 193 * stops with a non-prefix byte. 194 */ 195 ud_inp_back(u); 196 return 0; 197} 198 199 200static inline unsigned int modrm( struct ud * u ) 201{ 202 if ( !u->have_modrm ) { 203 u->modrm = ud_inp_next( u ); 204 u->have_modrm = 1; 205 } 206 return u->modrm; 207} 208 209 210static unsigned int resolve_operand_size( const struct ud * u, unsigned int s ) 211{ 212 switch ( s ) 213 { 214 case SZ_V: 215 return ( u->opr_mode ); 216 case SZ_Z: 217 return ( u->opr_mode == 16 ) ? 16 : 32; 218 case SZ_P: 219 return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP; 220 case SZ_MDQ: 221 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode; 222 case SZ_RDQ: 223 return ( u->dis_mode == 64 ) ? 64 : 32; 224 default: 225 return s; 226 } 227} 228 229 230static int resolve_mnemonic( struct ud* u ) 231{ 232 /* far/near flags */ 233 u->br_far = 0; 234 u->br_near = 0; 235 /* readjust operand sizes for call/jmp instrcutions */ 236 if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) { 237 /* WP: 16:16 pointer */ 238 if ( u->operand[ 0 ].size == SZ_WP ) { 239 u->operand[ 0 ].size = 16; 240 u->br_far = 1; 241 u->br_near= 0; 242 /* DP: 32:32 pointer */ 243 } else if ( u->operand[ 0 ].size == SZ_DP ) { 244 u->operand[ 0 ].size = 32; 245 u->br_far = 1; 246 u->br_near= 0; 247 } else { 248 u->br_far = 0; 249 u->br_near= 1; 250 } 251 /* resolve 3dnow weirdness. */ 252 } else if ( u->mnemonic == UD_I3dnow ) { 253 u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u ) ] ].mnemonic; 254 } 255 /* SWAPGS is only valid in 64bits mode */ 256 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { 257 u->error = 1; 258 return -1; 259 } 260 261 if (u->mnemonic == UD_Ixchg) { 262 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX && 263 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) || 264 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX && 265 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) { 266 u->operand[0].type = UD_NONE; 267 u->operand[1].type = UD_NONE; 268 u->mnemonic = UD_Inop; 269 } 270 } 271 272 if (u->mnemonic == UD_Inop && u->pfx_rep) { 273 u->pfx_rep = 0; 274 u->mnemonic = UD_Ipause; 275 } 276 return 0; 277} 278 279 280/* ----------------------------------------------------------------------------- 281 * decode_a()- Decodes operands of the type seg:offset 282 * ----------------------------------------------------------------------------- 283 */ 284static void 285decode_a(struct ud* u, struct ud_operand *op) 286{ 287 if (u->opr_mode == 16) { 288 /* seg16:off16 */ 289 op->type = UD_OP_PTR; 290 op->size = 32; 291 op->lval.ptr.off = ud_inp_uint16(u); 292 op->lval.ptr.seg = ud_inp_uint16(u); 293 } else { 294 /* seg16:off32 */ 295 op->type = UD_OP_PTR; 296 op->size = 48; 297 op->lval.ptr.off = ud_inp_uint32(u); 298 op->lval.ptr.seg = ud_inp_uint16(u); 299 } 300} 301 302/* ----------------------------------------------------------------------------- 303 * decode_gpr() - Returns decoded General Purpose Register 304 * ----------------------------------------------------------------------------- 305 */ 306static enum ud_type 307decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) 308{ 309 s = resolve_operand_size(u, s); 310 311 switch (s) { 312 case 64: 313 return UD_R_RAX + rm; 314 case SZ_DP: 315 case 32: 316 return UD_R_EAX + rm; 317 case SZ_WP: 318 case 16: 319 return UD_R_AX + rm; 320 case 8: 321 if (u->dis_mode == 64 && u->pfx_rex) { 322 if (rm >= 4) 323 return UD_R_SPL + (rm-4); 324 return UD_R_AL + rm; 325 } else return UD_R_AL + rm; 326 default: 327 return 0; 328 } 329} 330 331/* ----------------------------------------------------------------------------- 332 * resolve_gpr64() - 64bit General Purpose Register-Selection. 333 * ----------------------------------------------------------------------------- 334 */ 335static enum ud_type 336resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size) 337{ 338 if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15) 339 gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3); 340 else gpr_op = (gpr_op - OP_rAX); 341 342 if (u->opr_mode == 16) { 343 *size = 16; 344 return gpr_op + UD_R_AX; 345 } 346 if (u->dis_mode == 32 || 347 (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) { 348 *size = 32; 349 return gpr_op + UD_R_EAX; 350 } 351 352 *size = 64; 353 return gpr_op + UD_R_RAX; 354} 355 356/* ----------------------------------------------------------------------------- 357 * resolve_gpr32 () - 32bit General Purpose Register-Selection. 358 * ----------------------------------------------------------------------------- 359 */ 360static enum ud_type 361resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op) 362{ 363 gpr_op = gpr_op - OP_eAX; 364 365 if (u->opr_mode == 16) 366 return gpr_op + UD_R_AX; 367 368 return gpr_op + UD_R_EAX; 369} 370 371/* ----------------------------------------------------------------------------- 372 * resolve_reg() - Resolves the register type 373 * ----------------------------------------------------------------------------- 374 */ 375static enum ud_type 376resolve_reg(struct ud* u, unsigned int type, unsigned char i) 377{ 378 switch (type) { 379 case T_MMX : return UD_R_MM0 + (i & 7); 380 case T_XMM : return UD_R_XMM0 + i; 381 case T_CRG : return UD_R_CR0 + i; 382 case T_DBG : return UD_R_DR0 + i; 383 case T_SEG : { 384 /* 385 * Only 6 segment registers, anything else is an error. 386 */ 387 if ((i & 7) > 5) { 388 u->error = 1; 389 } else { 390 return UD_R_ES + (i & 7); 391 } 392 } 393 case T_NONE: 394 default: return UD_NONE; 395 } 396} 397 398/* ----------------------------------------------------------------------------- 399 * decode_imm() - Decodes Immediate values. 400 * ----------------------------------------------------------------------------- 401 */ 402static void 403decode_imm(struct ud* u, unsigned int s, struct ud_operand *op) 404{ 405 op->size = resolve_operand_size(u, s); 406 op->type = UD_OP_IMM; 407 408 switch (op->size) { 409 case 8: op->lval.sbyte = ud_inp_uint8(u); break; 410 case 16: op->lval.uword = ud_inp_uint16(u); break; 411 case 32: op->lval.udword = ud_inp_uint32(u); break; 412 case 64: op->lval.uqword = ud_inp_uint64(u); break; 413 default: return; 414 } 415} 416 417 418/* 419 * decode_modrm_reg 420 * 421 * Decodes reg field of mod/rm byte 422 * 423 */ 424static void 425decode_modrm_reg(struct ud *u, 426 struct ud_operand *operand, 427 unsigned int type, 428 unsigned int size) 429{ 430 uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); 431 operand->type = UD_OP_REG; 432 operand->size = resolve_operand_size(u, size); 433 434 if (type == T_GPR) { 435 operand->base = decode_gpr(u, operand->size, reg); 436 } else { 437 operand->base = resolve_reg(u, type, reg); 438 } 439} 440 441 442/* 443 * decode_modrm_rm 444 * 445 * Decodes rm field of mod/rm byte 446 * 447 */ 448static void 449decode_modrm_rm(struct ud *u, 450 struct ud_operand *op, 451 unsigned char type, 452 unsigned int size) 453 454{ 455 unsigned char mod, rm, reg; 456 457 /* get mod, r/m and reg fields */ 458 mod = MODRM_MOD(modrm(u)); 459 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u)); 460 reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); 461 462 op->size = resolve_operand_size(u, size); 463 464 /* 465 * If mod is 11b, then the modrm.rm specifies a register. 466 * 467 */ 468 if (mod == 3) { 469 op->type = UD_OP_REG; 470 if (type == T_GPR) { 471 op->base = decode_gpr(u, op->size, rm); 472 } else { 473 op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7)); 474 } 475 return; 476 } 477 478 479 /* 480 * !11 => Memory Address 481 */ 482 op->type = UD_OP_MEM; 483 484 if (u->adr_mode == 64) { 485 op->base = UD_R_RAX + rm; 486 if (mod == 1) { 487 op->offset = 8; 488 } else if (mod == 2) { 489 op->offset = 32; 490 } else if (mod == 0 && (rm & 7) == 5) { 491 op->base = UD_R_RIP; 492 op->offset = 32; 493 } else { 494 op->offset = 0; 495 } 496 /* 497 * Scale-Index-Base (SIB) 498 */ 499 if ((rm & 7) == 4) { 500 ud_inp_next(u); 501 502 op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; 503 op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); 504 op->base = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); 505 506 /* special conditions for base reference */ 507 if (op->index == UD_R_RSP) { 508 op->index = UD_NONE; 509 op->scale = UD_NONE; 510 } 511 512 if (op->base == UD_R_RBP || op->base == UD_R_R13) { 513 if (mod == 0) { 514 op->base = UD_NONE; 515 } 516 if (mod == 1) { 517 op->offset = 8; 518 } else { 519 op->offset = 32; 520 } 521 } 522 } 523 } else if (u->adr_mode == 32) { 524 op->base = UD_R_EAX + rm; 525 if (mod == 1) { 526 op->offset = 8; 527 } else if (mod == 2) { 528 op->offset = 32; 529 } else if (mod == 0 && rm == 5) { 530 op->base = UD_NONE; 531 op->offset = 32; 532 } else { 533 op->offset = 0; 534 } 535 536 /* Scale-Index-Base (SIB) */ 537 if ((rm & 7) == 4) { 538 ud_inp_next(u); 539 540 op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; 541 op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); 542 op->base = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); 543 544 if (op->index == UD_R_ESP) { 545 op->index = UD_NONE; 546 op->scale = UD_NONE; 547 } 548 549 /* special condition for base reference */ 550 if (op->base == UD_R_EBP) { 551 if (mod == 0) { 552 op->base = UD_NONE; 553 } 554 if (mod == 1) { 555 op->offset = 8; 556 } else { 557 op->offset = 32; 558 } 559 } 560 } 561 } else { 562 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP, 563 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX }; 564 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI, 565 UD_NONE, UD_NONE, UD_NONE, UD_NONE }; 566 op->base = bases[rm & 7]; 567 op->index = indices[rm & 7]; 568 if (mod == 0 && rm == 6) { 569 op->offset= 16; 570 op->base = UD_NONE; 571 } else if (mod == 1) { 572 op->offset = 8; 573 } else if (mod == 2) { 574 op->offset = 16; 575 } 576 } 577 578 /* 579 * extract offset, if any 580 */ 581 switch (op->offset) { 582 case 8 : op->lval.ubyte = ud_inp_uint8(u); break; 583 case 16: op->lval.uword = ud_inp_uint16(u); break; 584 case 32: op->lval.udword = ud_inp_uint32(u); break; 585 case 64: op->lval.uqword = ud_inp_uint64(u); break; 586 default: break; 587 } 588} 589 590/* ----------------------------------------------------------------------------- 591 * decode_o() - Decodes offset 592 * ----------------------------------------------------------------------------- 593 */ 594static void 595decode_o(struct ud* u, unsigned int s, struct ud_operand *op) 596{ 597 switch (u->adr_mode) { 598 case 64: 599 op->offset = 64; 600 op->lval.uqword = ud_inp_uint64(u); 601 break; 602 case 32: 603 op->offset = 32; 604 op->lval.udword = ud_inp_uint32(u); 605 break; 606 case 16: 607 op->offset = 16; 608 op->lval.uword = ud_inp_uint16(u); 609 break; 610 default: 611 return; 612 } 613 op->type = UD_OP_MEM; 614 op->size = resolve_operand_size(u, s); 615} 616 617/* ----------------------------------------------------------------------------- 618 * decode_operands() - Disassembles Operands. 619 * ----------------------------------------------------------------------------- 620 */ 621static int 622decode_operand(struct ud *u, 623 struct ud_operand *operand, 624 enum ud_operand_code type, 625 unsigned int size) 626{ 627 switch (type) { 628 case OP_A : 629 decode_a(u, operand); 630 break; 631 case OP_MR: 632 if (MODRM_MOD(modrm(u)) == 3) { 633 decode_modrm_rm(u, operand, T_GPR, 634 size == SZ_DY ? SZ_MDQ : SZ_V); 635 } else if (size == SZ_WV) { 636 decode_modrm_rm( u, operand, T_GPR, SZ_W); 637 } else if (size == SZ_BV) { 638 decode_modrm_rm( u, operand, T_GPR, SZ_B); 639 } else if (size == SZ_DY) { 640 decode_modrm_rm( u, operand, T_GPR, SZ_D); 641 } else { 642 ASSERT(!"unexpected size"); 643 } 644 break; 645 case OP_M: 646 if (MODRM_MOD(modrm(u)) == 3) { 647 u->error = 1; 648 } 649 /* intended fall through */ 650 case OP_E: 651 decode_modrm_rm(u, operand, T_GPR, size); 652 break; 653 break; 654 case OP_G: 655 decode_modrm_reg(u, operand, T_GPR, size); 656 break; 657 case OP_I: 658 decode_imm(u, size, operand); 659 break; 660 case OP_I1: 661 operand->type = UD_OP_CONST; 662 operand->lval.udword = 1; 663 break; 664 case OP_PR: 665 if (MODRM_MOD(modrm(u)) != 3) { 666 u->error = 1; 667 } 668 decode_modrm_rm(u, operand, T_MMX, size); 669 break; 670 case OP_P: 671 decode_modrm_reg(u, operand, T_MMX, size); 672 break; 673 case OP_VR: 674 if (MODRM_MOD(modrm(u)) != 3) { 675 u->error = 1; 676 } 677 /* intended fall through */ 678 case OP_W: 679 decode_modrm_rm(u, operand, T_XMM, size); 680 break; 681 case OP_V: 682 decode_modrm_reg(u, operand, T_XMM, size); 683 break; 684 case OP_S: 685 decode_modrm_reg(u, operand, T_SEG, size); 686 break; 687 case OP_AL: 688 case OP_CL: 689 case OP_DL: 690 case OP_BL: 691 case OP_AH: 692 case OP_CH: 693 case OP_DH: 694 case OP_BH: 695 operand->type = UD_OP_REG; 696 operand->base = UD_R_AL + (type - OP_AL); 697 operand->size = 8; 698 break; 699 case OP_DX: 700 operand->type = UD_OP_REG; 701 operand->base = UD_R_DX; 702 operand->size = 16; 703 break; 704 case OP_O: 705 decode_o(u, size, operand); 706 break; 707 case OP_rAXr8: 708 case OP_rCXr9: 709 case OP_rDXr10: 710 case OP_rBXr11: 711 case OP_rSPr12: 712 case OP_rBPr13: 713 case OP_rSIr14: 714 case OP_rDIr15: 715 case OP_rAX: 716 case OP_rCX: 717 case OP_rDX: 718 case OP_rBX: 719 case OP_rSP: 720 case OP_rBP: 721 case OP_rSI: 722 case OP_rDI: 723 operand->type = UD_OP_REG; 724 operand->base = resolve_gpr64(u, type, &operand->size); 725 break; 726 case OP_ALr8b: 727 case OP_CLr9b: 728 case OP_DLr10b: 729 case OP_BLr11b: 730 case OP_AHr12b: 731 case OP_CHr13b: 732 case OP_DHr14b: 733 case OP_BHr15b: { 734 ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL 735 + (REX_B(u->pfx_rex) << 3); 736 if (UD_R_AH <= gpr && u->pfx_rex) { 737 gpr = gpr + 4; 738 } 739 operand->type = UD_OP_REG; 740 operand->base = gpr; 741 break; 742 } 743 case OP_eAX: 744 case OP_eCX: 745 case OP_eDX: 746 case OP_eBX: 747 case OP_eSP: 748 case OP_eBP: 749 case OP_eSI: 750 case OP_eDI: 751 operand->type = UD_OP_REG; 752 operand->base = resolve_gpr32(u, type); 753 operand->size = u->opr_mode == 16 ? 16 : 32; 754 break; 755 case OP_ES: 756 case OP_CS: 757 case OP_DS: 758 case OP_SS: 759 case OP_FS: 760 case OP_GS: 761 /* in 64bits mode, only fs and gs are allowed */ 762 if (u->dis_mode == 64) { 763 if (type != OP_FS && type != OP_GS) { 764 u->error= 1; 765 } 766 } 767 operand->type = UD_OP_REG; 768 operand->base = (type - OP_ES) + UD_R_ES; 769 operand->size = 16; 770 break; 771 case OP_J : 772 decode_imm(u, size, operand); 773 operand->type = UD_OP_JIMM; 774 break ; 775 case OP_Q: 776 decode_modrm_rm(u, operand, T_MMX, size); 777 break; 778 case OP_R : 779 decode_modrm_rm(u, operand, T_GPR, size); 780 break; 781 case OP_C: 782 decode_modrm_reg(u, operand, T_CRG, size); 783 break; 784 case OP_D: 785 decode_modrm_reg(u, operand, T_DBG, size); 786 break; 787 case OP_I3 : 788 operand->type = UD_OP_CONST; 789 operand->lval.sbyte = 3; 790 break; 791 case OP_ST0: 792 case OP_ST1: 793 case OP_ST2: 794 case OP_ST3: 795 case OP_ST4: 796 case OP_ST5: 797 case OP_ST6: 798 case OP_ST7: 799 operand->type = UD_OP_REG; 800 operand->base = (type - OP_ST0) + UD_R_ST0; 801 operand->size = 0; 802 break; 803 case OP_AX: 804 operand->type = UD_OP_REG; 805 operand->base = UD_R_AX; 806 operand->size = 16; 807 break; 808 default : 809 operand->type = UD_NONE; 810 break; 811 } 812 return 0; 813} 814 815 816/* 817 * decode_operands 818 * 819 * Disassemble upto 3 operands of the current instruction being 820 * disassembled. By the end of the function, the operand fields 821 * of the ud structure will have been filled. 822 */ 823static int 824decode_operands(struct ud* u) 825{ 826 decode_operand(u, &u->operand[0], 827 u->itab_entry->operand1.type, 828 u->itab_entry->operand1.size); 829 decode_operand(u, &u->operand[1], 830 u->itab_entry->operand2.type, 831 u->itab_entry->operand2.size); 832 decode_operand(u, &u->operand[2], 833 u->itab_entry->operand3.type, 834 u->itab_entry->operand3.size); 835 return 0; 836} 837 838/* ----------------------------------------------------------------------------- 839 * clear_insn() - clear instruction structure 840 * ----------------------------------------------------------------------------- 841 */ 842static void 843clear_insn(register struct ud* u) 844{ 845 u->error = 0; 846 u->pfx_seg = 0; 847 u->pfx_opr = 0; 848 u->pfx_adr = 0; 849 u->pfx_lock = 0; 850 u->pfx_repne = 0; 851 u->pfx_rep = 0; 852 u->pfx_repe = 0; 853 u->pfx_rex = 0; 854 u->pfx_insn = 0; 855 u->mnemonic = UD_Inone; 856 u->itab_entry = NULL; 857 u->have_modrm = 0; 858 859 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) ); 860 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) ); 861 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) ); 862} 863 864static int 865resolve_mode( struct ud* u ) 866{ 867 /* if in error state, bail out */ 868 if ( u->error ) return -1; 869 870 /* propagate prefix effects */ 871 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */ 872 873 /* Check validity of instruction m64 */ 874 if ( P_INV64( u->itab_entry->prefix ) ) { 875 u->error = 1; 876 return -1; 877 } 878 879 /* effective rex prefix is the effective mask for the 880 * instruction hard-coded in the opcode map. 881 */ 882 u->pfx_rex = ( u->pfx_rex & 0x40 ) | 883 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) ); 884 885 /* whether this instruction has a default operand size of 886 * 64bit, also hardcoded into the opcode map. 887 */ 888 u->default64 = P_DEF64( u->itab_entry->prefix ); 889 /* calculate effective operand size */ 890 if ( REX_W( u->pfx_rex ) ) { 891 u->opr_mode = 64; 892 } else if ( u->pfx_opr ) { 893 u->opr_mode = 16; 894 } else { 895 /* unless the default opr size of instruction is 64, 896 * the effective operand size in the absence of rex.w 897 * prefix is 32. 898 */ 899 u->opr_mode = ( u->default64 ) ? 64 : 32; 900 } 901 902 /* calculate effective address size */ 903 u->adr_mode = (u->pfx_adr) ? 32 : 64; 904 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */ 905 u->opr_mode = ( u->pfx_opr ) ? 16 : 32; 906 u->adr_mode = ( u->pfx_adr ) ? 16 : 32; 907 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */ 908 u->opr_mode = ( u->pfx_opr ) ? 32 : 16; 909 u->adr_mode = ( u->pfx_adr ) ? 32 : 16; 910 } 911 912 /* These flags determine which operand to apply the operand size 913 * cast to. 914 */ 915 u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0; 916 u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0; 917 u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0; 918 919 /* set flags for implicit addressing */ 920 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix ); 921 922 return 0; 923} 924 925static int gen_hex( struct ud *u ) 926{ 927 unsigned int i; 928 unsigned char *src_ptr = ud_inp_sess( u ); 929 char* src_hex; 930 931 /* bail out if in error stat. */ 932 if ( u->error ) return -1; 933 /* output buffer pointe */ 934 src_hex = ( char* ) u->insn_hexcode; 935 /* for each byte used to decode instruction */ 936 for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) { 937 sprintf( src_hex, "%02x", *src_ptr & 0xFF ); 938 src_hex += 2; 939 } 940 return 0; 941} 942 943 944static inline int 945decode_insn(struct ud *u, uint16_t ptr) 946{ 947 ASSERT((ptr & 0x8000) == 0); 948 u->itab_entry = &ud_itab[ ptr ]; 949 u->mnemonic = u->itab_entry->mnemonic; 950 return (resolve_mode(u) == 0 && 951 decode_operands(u) == 0 && 952 resolve_mnemonic(u) == 0) ? 0 : -1; 953} 954 955 956/* 957 * decode_3dnow() 958 * 959 * Decoding 3dnow is a little tricky because of its strange opcode 960 * structure. The final opcode disambiguation depends on the last 961 * byte that comes after the operands have been decoded. Fortunately, 962 * all 3dnow instructions have the same set of operand types. So we 963 * go ahead and decode the instruction by picking an arbitrarily chosen 964 * valid entry in the table, decode the operands, and read the final 965 * byte to resolve the menmonic. 966 */ 967static inline int 968decode_3dnow(struct ud* u) 969{ 970 uint16_t ptr; 971 ASSERT(u->le->type == UD_TAB__OPC_3DNOW); 972 ASSERT(u->le->table[0xc] != 0); 973 decode_insn(u, u->le->table[0xc]); 974 ud_inp_next(u); 975 if (u->error) { 976 return -1; 977 } 978 ptr = u->le->table[ud_inp_curr(u)]; 979 ASSERT((ptr & 0x8000) == 0); 980 u->mnemonic = ud_itab[ptr].mnemonic; 981 return 0; 982} 983 984 985static int 986decode_ssepfx(struct ud *u) 987{ 988 uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2; 989 if (u->le->table[idx] == 0) { 990 idx = 0; 991 } 992 if (idx && u->le->table[idx] != 0) { 993 /* 994 * "Consume" the prefix as a part of the opcode, so it is no 995 * longer exported as an instruction prefix. 996 */ 997 switch (u->pfx_insn) { 998 case 0xf2: 999 u->pfx_repne = 0; 1000 break; 1001 case 0xf3: 1002 u->pfx_rep = 0; 1003 u->pfx_repe = 0; 1004 break; 1005 case 0x66: 1006 u->pfx_opr = 0; 1007 break; 1008 } 1009 } 1010 return decode_ext(u, u->le->table[idx]); 1011} 1012 1013 1014/* 1015 * decode_ext() 1016 * 1017 * Decode opcode extensions (if any) 1018 */ 1019static int 1020decode_ext(struct ud *u, uint16_t ptr) 1021{ 1022 uint8_t idx = 0; 1023 if ((ptr & 0x8000) == 0) { 1024 return decode_insn(u, ptr); 1025 } 1026 u->le = &ud_lookup_table_list[(~0x8000 & ptr)]; 1027 if (u->le->type == UD_TAB__OPC_3DNOW) { 1028 return decode_3dnow(u); 1029 } 1030 1031 switch (u->le->type) { 1032 case UD_TAB__OPC_MOD: 1033 /* !11 = 0, 11 = 1 */ 1034 idx = (MODRM_MOD(modrm(u)) + 1) / 4; 1035 break; 1036 /* disassembly mode/operand size/address size based tables. 1037 * 16 = 0,, 32 = 1, 64 = 2 1038 */ 1039 case UD_TAB__OPC_MODE: 1040 idx = u->dis_mode / 32; 1041 break; 1042 case UD_TAB__OPC_OSIZE: 1043 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32; 1044 break; 1045 case UD_TAB__OPC_ASIZE: 1046 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32; 1047 break; 1048 case UD_TAB__OPC_X87: 1049 idx = modrm(u) - 0xC0; 1050 break; 1051 case UD_TAB__OPC_VENDOR: 1052 if (u->vendor == UD_VENDOR_ANY) { 1053 /* choose a valid entry */ 1054 idx = (u->le->table[idx] != 0) ? 0 : 1; 1055 } else if (u->vendor == UD_VENDOR_AMD) { 1056 idx = 0; 1057 } else { 1058 idx = 1; 1059 } 1060 break; 1061 case UD_TAB__OPC_RM: 1062 idx = MODRM_RM(modrm(u)); 1063 break; 1064 case UD_TAB__OPC_REG: 1065 idx = MODRM_REG(modrm(u)); 1066 break; 1067 case UD_TAB__OPC_SSE: 1068 return decode_ssepfx(u); 1069 default: 1070 ASSERT(!"not reached"); 1071 break; 1072 } 1073 1074 return decode_ext(u, u->le->table[idx]); 1075} 1076 1077 1078static inline int 1079decode_opcode(struct ud *u) 1080{ 1081 uint16_t ptr; 1082 ASSERT(u->le->type == UD_TAB__OPC_TABLE); 1083 ud_inp_next(u); 1084 if (u->error) { 1085 return -1; 1086 } 1087 ptr = u->le->table[ud_inp_curr(u)]; 1088 if (ptr & 0x8000) { 1089 u->le = &ud_lookup_table_list[ptr & ~0x8000]; 1090 if (u->le->type == UD_TAB__OPC_TABLE) { 1091 return decode_opcode(u); 1092 } 1093 } 1094 return decode_ext(u, ptr); 1095} 1096 1097 1098/* ============================================================================= 1099 * ud_decode() - Instruction decoder. Returns the number of bytes decoded. 1100 * ============================================================================= 1101 */ 1102unsigned int 1103ud_decode(struct ud *u) 1104{ 1105 ud_inp_start(u); 1106 clear_insn(u); 1107 u->le = &ud_lookup_table_list[0]; 1108 u->error = decode_prefixes(u) == -1 || 1109 decode_opcode(u) == -1 || 1110 u->error; 1111 /* Handle decode error. */ 1112 if (u->error) { 1113 /* clear out the decode data. */ 1114 clear_insn(u); 1115 /* mark the sequence of bytes as invalid. */ 1116 u->itab_entry = & s_ie__invalid; 1117 u->mnemonic = u->itab_entry->mnemonic; 1118 } 1119 1120 /* maybe this stray segment override byte 1121 * should be spewed out? 1122 */ 1123 if ( !P_SEG( u->itab_entry->prefix ) && 1124 u->operand[0].type != UD_OP_MEM && 1125 u->operand[1].type != UD_OP_MEM ) 1126 u->pfx_seg = 0; 1127 1128 u->insn_offset = u->pc; /* set offset of instruction */ 1129 u->insn_fill = 0; /* set translation buffer index to 0 */ 1130 u->pc += u->inp_ctr; /* move program counter by bytes decoded */ 1131 gen_hex( u ); /* generate hex code */ 1132 1133 /* return number of bytes disassembled. */ 1134 return u->inp_ctr; 1135} 1136 1137/* 1138vim: set ts=2 sw=2 expandtab 1139*/ 1140 1141#endif // USE(UDIS86) 1142