1/* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 *     * Redistributions of source code must retain the above copyright notice,
10 *       this list of conditions and the following disclaimer.
11 *     * Redistributions in binary form must reproduce the above copyright notice,
12 *       this list of conditions and the following disclaimer in the documentation
13 *       and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28
29#if USE(UDIS86)
30
31#include "udis86_extern.h"
32#include "udis86_types.h"
33#include "udis86_input.h"
34#include "udis86_decode.h"
35#include <wtf/Assertions.h>
36
37#define dbg(x, n...)
38/* #define dbg printf */
39
40#ifndef __UD_STANDALONE__
41# include <string.h>
42#endif /* __UD_STANDALONE__ */
43
44/* The max number of prefixes to an instruction */
45#define MAX_PREFIXES    15
46
47/* instruction aliases and special cases */
48static struct ud_itab_entry s_ie__invalid =
49    { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none };
50
51static int
52decode_ext(struct ud *u, uint16_t ptr);
53
54
55static inline int
56eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
57{
58  if (dis_mode == 64) {
59    return rex_w ? 64 : (pfx_opr ? 16 : 32);
60  } else if (dis_mode == 32) {
61    return pfx_opr ? 16 : 32;
62  } else {
63    ASSERT(dis_mode == 16);
64    return pfx_opr ? 32 : 16;
65  }
66}
67
68
69static inline int
70eff_adr_mode(int dis_mode, int pfx_adr)
71{
72  if (dis_mode == 64) {
73    return pfx_adr ? 32 : 64;
74  } else if (dis_mode == 32) {
75    return pfx_adr ? 16 : 32;
76  } else {
77    ASSERT(dis_mode == 16);
78    return pfx_adr ? 32 : 16;
79  }
80}
81
82
83/* Looks up mnemonic code in the mnemonic string table
84 * Returns NULL if the mnemonic code is invalid
85 */
86const char * ud_lookup_mnemonic( enum ud_mnemonic_code c )
87{
88    return ud_mnemonics_str[ c ];
89}
90
91
92/*
93 * decode_prefixes
94 *
95 *  Extracts instruction prefixes.
96 */
97static int
98decode_prefixes(struct ud *u)
99{
100    unsigned int have_pfx = 1;
101    unsigned int i;
102    uint8_t curr;
103
104    /* if in error state, bail out */
105    if ( u->error )
106        return -1;
107
108    /* keep going as long as there are prefixes available */
109    for ( i = 0; have_pfx ; ++i ) {
110
111        /* Get next byte. */
112        ud_inp_next(u);
113        if ( u->error )
114            return -1;
115        curr = ud_inp_curr( u );
116
117        /* rex prefixes in 64bit mode */
118        if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) {
119            u->pfx_rex = curr;
120        } else {
121            switch ( curr )
122            {
123            case 0x2E :
124                u->pfx_seg = UD_R_CS;
125                u->pfx_rex = 0;
126                break;
127            case 0x36 :
128                u->pfx_seg = UD_R_SS;
129                u->pfx_rex = 0;
130                break;
131            case 0x3E :
132                u->pfx_seg = UD_R_DS;
133                u->pfx_rex = 0;
134                break;
135            case 0x26 :
136                u->pfx_seg = UD_R_ES;
137                u->pfx_rex = 0;
138                break;
139            case 0x64 :
140                u->pfx_seg = UD_R_FS;
141                u->pfx_rex = 0;
142                break;
143            case 0x65 :
144                u->pfx_seg = UD_R_GS;
145                u->pfx_rex = 0;
146                break;
147            case 0x67 : /* adress-size override prefix */
148                u->pfx_adr = 0x67;
149                u->pfx_rex = 0;
150                break;
151            case 0xF0 :
152                u->pfx_lock = 0xF0;
153                u->pfx_rex  = 0;
154                break;
155            case 0x66:
156                /* the 0x66 sse prefix is only effective if no other sse prefix
157                 * has already been specified.
158                 */
159                if ( !u->pfx_insn ) u->pfx_insn = 0x66;
160                u->pfx_opr = 0x66;
161                u->pfx_rex = 0;
162                break;
163            case 0xF2:
164                u->pfx_insn  = 0xF2;
165                u->pfx_repne = 0xF2;
166                u->pfx_rex   = 0;
167                break;
168            case 0xF3:
169                u->pfx_insn = 0xF3;
170                u->pfx_rep  = 0xF3;
171                u->pfx_repe = 0xF3;
172                u->pfx_rex  = 0;
173                break;
174            default :
175                /* No more prefixes */
176                have_pfx = 0;
177                break;
178            }
179        }
180
181        /* check if we reached max instruction length */
182        if ( i + 1 == MAX_INSN_LENGTH ) {
183            u->error = 1;
184            break;
185        }
186    }
187
188    /* return status */
189    if ( u->error )
190        return -1;
191
192    /* rewind back one byte in stream, since the above loop
193     * stops with a non-prefix byte.
194     */
195    ud_inp_back(u);
196    return 0;
197}
198
199
200static inline unsigned int modrm( struct ud * u )
201{
202    if ( !u->have_modrm ) {
203        u->modrm = ud_inp_next( u );
204        u->have_modrm = 1;
205    }
206    return u->modrm;
207}
208
209
210static unsigned int resolve_operand_size( const struct ud * u, unsigned int s )
211{
212    switch ( s )
213    {
214    case SZ_V:
215        return ( u->opr_mode );
216    case SZ_Z:
217        return ( u->opr_mode == 16 ) ? 16 : 32;
218    case SZ_P:
219        return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP;
220    case SZ_MDQ:
221        return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
222    case SZ_RDQ:
223        return ( u->dis_mode == 64 ) ? 64 : 32;
224    default:
225        return s;
226    }
227}
228
229
230static int resolve_mnemonic( struct ud* u )
231{
232  /* far/near flags */
233  u->br_far = 0;
234  u->br_near = 0;
235  /* readjust operand sizes for call/jmp instrcutions */
236  if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) {
237    /* WP: 16:16 pointer */
238    if ( u->operand[ 0 ].size == SZ_WP ) {
239        u->operand[ 0 ].size = 16;
240        u->br_far = 1;
241        u->br_near= 0;
242    /* DP: 32:32 pointer */
243    } else if ( u->operand[ 0 ].size == SZ_DP ) {
244        u->operand[ 0 ].size = 32;
245        u->br_far = 1;
246        u->br_near= 0;
247    } else {
248        u->br_far = 0;
249        u->br_near= 1;
250    }
251  /* resolve 3dnow weirdness. */
252  } else if ( u->mnemonic == UD_I3dnow ) {
253    u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u )  ] ].mnemonic;
254  }
255  /* SWAPGS is only valid in 64bits mode */
256  if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
257    u->error = 1;
258    return -1;
259  }
260
261  if (u->mnemonic == UD_Ixchg) {
262    if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX  &&
263         u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
264        (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
265         u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
266      u->operand[0].type = UD_NONE;
267      u->operand[1].type = UD_NONE;
268      u->mnemonic = UD_Inop;
269    }
270  }
271
272  if (u->mnemonic == UD_Inop && u->pfx_rep) {
273    u->pfx_rep = 0;
274    u->mnemonic = UD_Ipause;
275  }
276  return 0;
277}
278
279
280/* -----------------------------------------------------------------------------
281 * decode_a()- Decodes operands of the type seg:offset
282 * -----------------------------------------------------------------------------
283 */
284static void
285decode_a(struct ud* u, struct ud_operand *op)
286{
287  if (u->opr_mode == 16) {
288    /* seg16:off16 */
289    op->type = UD_OP_PTR;
290    op->size = 32;
291    op->lval.ptr.off = ud_inp_uint16(u);
292    op->lval.ptr.seg = ud_inp_uint16(u);
293  } else {
294    /* seg16:off32 */
295    op->type = UD_OP_PTR;
296    op->size = 48;
297    op->lval.ptr.off = ud_inp_uint32(u);
298    op->lval.ptr.seg = ud_inp_uint16(u);
299  }
300}
301
302/* -----------------------------------------------------------------------------
303 * decode_gpr() - Returns decoded General Purpose Register
304 * -----------------------------------------------------------------------------
305 */
306static enum ud_type
307decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
308{
309  s = resolve_operand_size(u, s);
310
311  switch (s) {
312    case 64:
313        return UD_R_RAX + rm;
314    case SZ_DP:
315    case 32:
316        return UD_R_EAX + rm;
317    case SZ_WP:
318    case 16:
319        return UD_R_AX  + rm;
320    case  8:
321        if (u->dis_mode == 64 && u->pfx_rex) {
322            if (rm >= 4)
323                return UD_R_SPL + (rm-4);
324            return UD_R_AL + rm;
325        } else return UD_R_AL + rm;
326    default:
327        return 0;
328  }
329}
330
331/* -----------------------------------------------------------------------------
332 * resolve_gpr64() - 64bit General Purpose Register-Selection.
333 * -----------------------------------------------------------------------------
334 */
335static enum ud_type
336resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size)
337{
338  if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15)
339    gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3);
340  else  gpr_op = (gpr_op - OP_rAX);
341
342  if (u->opr_mode == 16) {
343    *size = 16;
344    return gpr_op + UD_R_AX;
345  }
346  if (u->dis_mode == 32 ||
347    (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) {
348    *size = 32;
349    return gpr_op + UD_R_EAX;
350  }
351
352  *size = 64;
353  return gpr_op + UD_R_RAX;
354}
355
356/* -----------------------------------------------------------------------------
357 * resolve_gpr32 () - 32bit General Purpose Register-Selection.
358 * -----------------------------------------------------------------------------
359 */
360static enum ud_type
361resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op)
362{
363  gpr_op = gpr_op - OP_eAX;
364
365  if (u->opr_mode == 16)
366    return gpr_op + UD_R_AX;
367
368  return gpr_op +  UD_R_EAX;
369}
370
371/* -----------------------------------------------------------------------------
372 * resolve_reg() - Resolves the register type
373 * -----------------------------------------------------------------------------
374 */
375static enum ud_type
376resolve_reg(struct ud* u, unsigned int type, unsigned char i)
377{
378  switch (type) {
379    case T_MMX :    return UD_R_MM0  + (i & 7);
380    case T_XMM :    return UD_R_XMM0 + i;
381    case T_CRG :    return UD_R_CR0  + i;
382    case T_DBG :    return UD_R_DR0  + i;
383    case T_SEG : {
384      /*
385       * Only 6 segment registers, anything else is an error.
386       */
387      if ((i & 7) > 5) {
388        u->error = 1;
389      } else {
390        return UD_R_ES + (i & 7);
391      }
392    }
393    case T_NONE:
394    default:    return UD_NONE;
395  }
396}
397
398/* -----------------------------------------------------------------------------
399 * decode_imm() - Decodes Immediate values.
400 * -----------------------------------------------------------------------------
401 */
402static void
403decode_imm(struct ud* u, unsigned int s, struct ud_operand *op)
404{
405  op->size = resolve_operand_size(u, s);
406  op->type = UD_OP_IMM;
407
408  switch (op->size) {
409    case  8: op->lval.sbyte = ud_inp_uint8(u);   break;
410    case 16: op->lval.uword = ud_inp_uint16(u);  break;
411    case 32: op->lval.udword = ud_inp_uint32(u); break;
412    case 64: op->lval.uqword = ud_inp_uint64(u); break;
413    default: return;
414  }
415}
416
417
418/*
419 * decode_modrm_reg
420 *
421 *    Decodes reg field of mod/rm byte
422 *
423 */
424static void
425decode_modrm_reg(struct ud         *u,
426                 struct ud_operand *operand,
427                 unsigned int       type,
428                 unsigned int       size)
429{
430  uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
431  operand->type = UD_OP_REG;
432  operand->size = resolve_operand_size(u, size);
433
434  if (type == T_GPR) {
435    operand->base = decode_gpr(u, operand->size, reg);
436  } else {
437    operand->base = resolve_reg(u, type, reg);
438  }
439}
440
441
442/*
443 * decode_modrm_rm
444 *
445 *    Decodes rm field of mod/rm byte
446 *
447 */
448static void
449decode_modrm_rm(struct ud         *u,
450                struct ud_operand *op,
451                unsigned char      type,
452                unsigned int       size)
453
454{
455  unsigned char mod, rm, reg;
456
457  /* get mod, r/m and reg fields */
458  mod = MODRM_MOD(modrm(u));
459  rm  = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
460  reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
461
462  op->size = resolve_operand_size(u, size);
463
464  /*
465   * If mod is 11b, then the modrm.rm specifies a register.
466   *
467   */
468  if (mod == 3) {
469    op->type = UD_OP_REG;
470    if (type ==  T_GPR) {
471      op->base = decode_gpr(u, op->size, rm);
472    } else {
473      op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7));
474    }
475    return;
476  }
477
478
479  /*
480   * !11 => Memory Address
481   */
482  op->type = UD_OP_MEM;
483
484  if (u->adr_mode == 64) {
485    op->base = UD_R_RAX + rm;
486    if (mod == 1) {
487      op->offset = 8;
488    } else if (mod == 2) {
489      op->offset = 32;
490    } else if (mod == 0 && (rm & 7) == 5) {
491      op->base = UD_R_RIP;
492      op->offset = 32;
493    } else {
494      op->offset = 0;
495    }
496    /*
497     * Scale-Index-Base (SIB)
498     */
499    if ((rm & 7) == 4) {
500      ud_inp_next(u);
501
502      op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
503      op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
504      op->base  = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
505
506      /* special conditions for base reference */
507      if (op->index == UD_R_RSP) {
508        op->index = UD_NONE;
509        op->scale = UD_NONE;
510      }
511
512      if (op->base == UD_R_RBP || op->base == UD_R_R13) {
513        if (mod == 0) {
514          op->base = UD_NONE;
515        }
516        if (mod == 1) {
517          op->offset = 8;
518        } else {
519          op->offset = 32;
520        }
521      }
522    }
523  } else if (u->adr_mode == 32) {
524    op->base = UD_R_EAX + rm;
525    if (mod == 1) {
526      op->offset = 8;
527    } else if (mod == 2) {
528      op->offset = 32;
529    } else if (mod == 0 && rm == 5) {
530      op->base = UD_NONE;
531      op->offset = 32;
532    } else {
533      op->offset = 0;
534    }
535
536    /* Scale-Index-Base (SIB) */
537    if ((rm & 7) == 4) {
538      ud_inp_next(u);
539
540      op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
541      op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
542      op->base  = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
543
544      if (op->index == UD_R_ESP) {
545        op->index = UD_NONE;
546        op->scale = UD_NONE;
547      }
548
549      /* special condition for base reference */
550      if (op->base == UD_R_EBP) {
551        if (mod == 0) {
552          op->base = UD_NONE;
553        }
554        if (mod == 1) {
555          op->offset = 8;
556        } else {
557          op->offset = 32;
558        }
559      }
560    }
561  } else {
562    const unsigned int bases[]   = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
563                                     UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
564    const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
565                                     UD_NONE, UD_NONE, UD_NONE, UD_NONE };
566    op->base  = bases[rm & 7];
567    op->index = indices[rm & 7];
568    if (mod == 0 && rm == 6) {
569      op->offset= 16;
570      op->base = UD_NONE;
571    } else if (mod == 1) {
572      op->offset = 8;
573    } else if (mod == 2) {
574      op->offset = 16;
575    }
576  }
577
578  /*
579   * extract offset, if any
580   */
581  switch (op->offset) {
582    case 8 : op->lval.ubyte  = ud_inp_uint8(u);  break;
583    case 16: op->lval.uword  = ud_inp_uint16(u); break;
584    case 32: op->lval.udword = ud_inp_uint32(u); break;
585    case 64: op->lval.uqword = ud_inp_uint64(u); break;
586    default: break;
587  }
588}
589
590/* -----------------------------------------------------------------------------
591 * decode_o() - Decodes offset
592 * -----------------------------------------------------------------------------
593 */
594static void
595decode_o(struct ud* u, unsigned int s, struct ud_operand *op)
596{
597  switch (u->adr_mode) {
598    case 64:
599        op->offset = 64;
600        op->lval.uqword = ud_inp_uint64(u);
601        break;
602    case 32:
603        op->offset = 32;
604        op->lval.udword = ud_inp_uint32(u);
605        break;
606    case 16:
607        op->offset = 16;
608        op->lval.uword  = ud_inp_uint16(u);
609        break;
610    default:
611        return;
612  }
613  op->type = UD_OP_MEM;
614  op->size = resolve_operand_size(u, s);
615}
616
617/* -----------------------------------------------------------------------------
618 * decode_operands() - Disassembles Operands.
619 * -----------------------------------------------------------------------------
620 */
621static int
622decode_operand(struct ud           *u,
623               struct ud_operand   *operand,
624               enum ud_operand_code type,
625               unsigned int         size)
626{
627  switch (type) {
628    case OP_A :
629      decode_a(u, operand);
630      break;
631    case OP_MR:
632      if (MODRM_MOD(modrm(u)) == 3) {
633        decode_modrm_rm(u, operand, T_GPR,
634                        size == SZ_DY ? SZ_MDQ : SZ_V);
635      } else if (size == SZ_WV) {
636        decode_modrm_rm( u, operand, T_GPR, SZ_W);
637      } else if (size == SZ_BV) {
638        decode_modrm_rm( u, operand, T_GPR, SZ_B);
639      } else if (size == SZ_DY) {
640        decode_modrm_rm( u, operand, T_GPR, SZ_D);
641      } else {
642        ASSERT(!"unexpected size");
643      }
644      break;
645    case OP_M:
646      if (MODRM_MOD(modrm(u)) == 3) {
647          u->error = 1;
648      }
649      /* intended fall through */
650    case OP_E:
651      decode_modrm_rm(u, operand, T_GPR, size);
652      break;
653      break;
654    case OP_G:
655      decode_modrm_reg(u, operand, T_GPR, size);
656      break;
657    case OP_I:
658      decode_imm(u, size, operand);
659      break;
660    case OP_I1:
661      operand->type = UD_OP_CONST;
662      operand->lval.udword = 1;
663      break;
664    case OP_PR:
665      if (MODRM_MOD(modrm(u)) != 3) {
666          u->error = 1;
667      }
668      decode_modrm_rm(u, operand, T_MMX, size);
669      break;
670    case OP_P:
671      decode_modrm_reg(u, operand, T_MMX, size);
672      break;
673    case OP_VR:
674      if (MODRM_MOD(modrm(u)) != 3) {
675          u->error = 1;
676      }
677      /* intended fall through */
678    case OP_W:
679      decode_modrm_rm(u, operand, T_XMM, size);
680      break;
681    case OP_V:
682      decode_modrm_reg(u, operand, T_XMM, size);
683      break;
684    case OP_S:
685      decode_modrm_reg(u, operand, T_SEG, size);
686      break;
687    case OP_AL:
688    case OP_CL:
689    case OP_DL:
690    case OP_BL:
691    case OP_AH:
692    case OP_CH:
693    case OP_DH:
694    case OP_BH:
695      operand->type = UD_OP_REG;
696      operand->base = UD_R_AL + (type - OP_AL);
697      operand->size = 8;
698      break;
699    case OP_DX:
700      operand->type = UD_OP_REG;
701      operand->base = UD_R_DX;
702      operand->size = 16;
703      break;
704    case OP_O:
705      decode_o(u, size, operand);
706      break;
707    case OP_rAXr8:
708    case OP_rCXr9:
709    case OP_rDXr10:
710    case OP_rBXr11:
711    case OP_rSPr12:
712    case OP_rBPr13:
713    case OP_rSIr14:
714    case OP_rDIr15:
715    case OP_rAX:
716    case OP_rCX:
717    case OP_rDX:
718    case OP_rBX:
719    case OP_rSP:
720    case OP_rBP:
721    case OP_rSI:
722    case OP_rDI:
723      operand->type = UD_OP_REG;
724      operand->base = resolve_gpr64(u, type, &operand->size);
725      break;
726    case OP_ALr8b:
727    case OP_CLr9b:
728    case OP_DLr10b:
729    case OP_BLr11b:
730    case OP_AHr12b:
731    case OP_CHr13b:
732    case OP_DHr14b:
733    case OP_BHr15b: {
734      ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL
735                        + (REX_B(u->pfx_rex) << 3);
736      if (UD_R_AH <= gpr && u->pfx_rex) {
737        gpr = gpr + 4;
738      }
739      operand->type = UD_OP_REG;
740      operand->base = gpr;
741      break;
742    }
743    case OP_eAX:
744    case OP_eCX:
745    case OP_eDX:
746    case OP_eBX:
747    case OP_eSP:
748    case OP_eBP:
749    case OP_eSI:
750    case OP_eDI:
751      operand->type = UD_OP_REG;
752      operand->base = resolve_gpr32(u, type);
753      operand->size = u->opr_mode == 16 ? 16 : 32;
754      break;
755    case OP_ES:
756    case OP_CS:
757    case OP_DS:
758    case OP_SS:
759    case OP_FS:
760    case OP_GS:
761      /* in 64bits mode, only fs and gs are allowed */
762      if (u->dis_mode == 64) {
763        if (type != OP_FS && type != OP_GS) {
764          u->error= 1;
765        }
766      }
767      operand->type = UD_OP_REG;
768      operand->base = (type - OP_ES) + UD_R_ES;
769      operand->size = 16;
770      break;
771    case OP_J :
772      decode_imm(u, size, operand);
773      operand->type = UD_OP_JIMM;
774      break ;
775    case OP_Q:
776      decode_modrm_rm(u, operand, T_MMX, size);
777      break;
778    case OP_R :
779      decode_modrm_rm(u, operand, T_GPR, size);
780      break;
781    case OP_C:
782      decode_modrm_reg(u, operand, T_CRG, size);
783      break;
784    case OP_D:
785      decode_modrm_reg(u, operand, T_DBG, size);
786      break;
787    case OP_I3 :
788      operand->type = UD_OP_CONST;
789      operand->lval.sbyte = 3;
790      break;
791    case OP_ST0:
792    case OP_ST1:
793    case OP_ST2:
794    case OP_ST3:
795    case OP_ST4:
796    case OP_ST5:
797    case OP_ST6:
798    case OP_ST7:
799      operand->type = UD_OP_REG;
800      operand->base = (type - OP_ST0) + UD_R_ST0;
801      operand->size = 0;
802      break;
803    case OP_AX:
804      operand->type = UD_OP_REG;
805      operand->base = UD_R_AX;
806      operand->size = 16;
807      break;
808    default :
809      operand->type = UD_NONE;
810      break;
811  }
812  return 0;
813}
814
815
816/*
817 * decode_operands
818 *
819 *    Disassemble upto 3 operands of the current instruction being
820 *    disassembled. By the end of the function, the operand fields
821 *    of the ud structure will have been filled.
822 */
823static int
824decode_operands(struct ud* u)
825{
826  decode_operand(u, &u->operand[0],
827                    u->itab_entry->operand1.type,
828                    u->itab_entry->operand1.size);
829  decode_operand(u, &u->operand[1],
830                    u->itab_entry->operand2.type,
831                    u->itab_entry->operand2.size);
832  decode_operand(u, &u->operand[2],
833                    u->itab_entry->operand3.type,
834                    u->itab_entry->operand3.size);
835  return 0;
836}
837
838/* -----------------------------------------------------------------------------
839 * clear_insn() - clear instruction structure
840 * -----------------------------------------------------------------------------
841 */
842static void
843clear_insn(register struct ud* u)
844{
845  u->error     = 0;
846  u->pfx_seg   = 0;
847  u->pfx_opr   = 0;
848  u->pfx_adr   = 0;
849  u->pfx_lock  = 0;
850  u->pfx_repne = 0;
851  u->pfx_rep   = 0;
852  u->pfx_repe  = 0;
853  u->pfx_rex   = 0;
854  u->pfx_insn  = 0;
855  u->mnemonic  = UD_Inone;
856  u->itab_entry = NULL;
857  u->have_modrm = 0;
858
859  memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
860  memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
861  memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
862}
863
864static int
865resolve_mode( struct ud* u )
866{
867  /* if in error state, bail out */
868  if ( u->error ) return -1;
869
870  /* propagate prefix effects */
871  if ( u->dis_mode == 64 ) {  /* set 64bit-mode flags */
872
873    /* Check validity of  instruction m64 */
874    if ( P_INV64( u->itab_entry->prefix ) ) {
875        u->error = 1;
876        return -1;
877    }
878
879    /* effective rex prefix is the  effective mask for the
880     * instruction hard-coded in the opcode map.
881     */
882    u->pfx_rex = ( u->pfx_rex & 0x40 ) |
883                 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
884
885    /* whether this instruction has a default operand size of
886     * 64bit, also hardcoded into the opcode map.
887     */
888    u->default64 = P_DEF64( u->itab_entry->prefix );
889    /* calculate effective operand size */
890    if ( REX_W( u->pfx_rex ) ) {
891        u->opr_mode = 64;
892    } else if ( u->pfx_opr ) {
893        u->opr_mode = 16;
894    } else {
895        /* unless the default opr size of instruction is 64,
896         * the effective operand size in the absence of rex.w
897         * prefix is 32.
898         */
899        u->opr_mode = ( u->default64 ) ? 64 : 32;
900    }
901
902    /* calculate effective address size */
903    u->adr_mode = (u->pfx_adr) ? 32 : 64;
904  } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
905    u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
906    u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
907  } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
908    u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
909    u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
910  }
911
912  /* These flags determine which operand to apply the operand size
913   * cast to.
914   */
915  u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0;
916  u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0;
917  u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0;
918
919  /* set flags for implicit addressing */
920  u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
921
922  return 0;
923}
924
925static int gen_hex( struct ud *u )
926{
927  unsigned int i;
928  unsigned char *src_ptr = ud_inp_sess( u );
929  char* src_hex;
930
931  /* bail out if in error stat. */
932  if ( u->error ) return -1;
933  /* output buffer pointe */
934  src_hex = ( char* ) u->insn_hexcode;
935  /* for each byte used to decode instruction */
936  for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) {
937    sprintf( src_hex, "%02x", *src_ptr & 0xFF );
938    src_hex += 2;
939  }
940  return 0;
941}
942
943
944static inline int
945decode_insn(struct ud *u, uint16_t ptr)
946{
947  ASSERT((ptr & 0x8000) == 0);
948  u->itab_entry = &ud_itab[ ptr ];
949  u->mnemonic = u->itab_entry->mnemonic;
950  return (resolve_mode(u)     == 0 &&
951          decode_operands(u)  == 0 &&
952          resolve_mnemonic(u) == 0) ? 0 : -1;
953}
954
955
956/*
957 * decode_3dnow()
958 *
959 *    Decoding 3dnow is a little tricky because of its strange opcode
960 *    structure. The final opcode disambiguation depends on the last
961 *    byte that comes after the operands have been decoded. Fortunately,
962 *    all 3dnow instructions have the same set of operand types. So we
963 *    go ahead and decode the instruction by picking an arbitrarily chosen
964 *    valid entry in the table, decode the operands, and read the final
965 *    byte to resolve the menmonic.
966 */
967static inline int
968decode_3dnow(struct ud* u)
969{
970  uint16_t ptr;
971  ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
972  ASSERT(u->le->table[0xc] != 0);
973  decode_insn(u, u->le->table[0xc]);
974  ud_inp_next(u);
975  if (u->error) {
976    return -1;
977  }
978  ptr = u->le->table[ud_inp_curr(u)];
979  ASSERT((ptr & 0x8000) == 0);
980  u->mnemonic = ud_itab[ptr].mnemonic;
981  return 0;
982}
983
984
985static int
986decode_ssepfx(struct ud *u)
987{
988  uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2;
989  if (u->le->table[idx] == 0) {
990    idx = 0;
991  }
992  if (idx && u->le->table[idx] != 0) {
993    /*
994     * "Consume" the prefix as a part of the opcode, so it is no
995     * longer exported as an instruction prefix.
996     */
997    switch (u->pfx_insn) {
998      case 0xf2:
999        u->pfx_repne = 0;
1000        break;
1001      case 0xf3:
1002        u->pfx_rep = 0;
1003        u->pfx_repe = 0;
1004        break;
1005      case 0x66:
1006        u->pfx_opr = 0;
1007        break;
1008    }
1009  }
1010  return decode_ext(u, u->le->table[idx]);
1011}
1012
1013
1014/*
1015 * decode_ext()
1016 *
1017 *    Decode opcode extensions (if any)
1018 */
1019static int
1020decode_ext(struct ud *u, uint16_t ptr)
1021{
1022  uint8_t idx = 0;
1023  if ((ptr & 0x8000) == 0) {
1024    return decode_insn(u, ptr);
1025  }
1026  u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1027  if (u->le->type == UD_TAB__OPC_3DNOW) {
1028    return decode_3dnow(u);
1029  }
1030
1031  switch (u->le->type) {
1032    case UD_TAB__OPC_MOD:
1033      /* !11 = 0, 11 = 1 */
1034      idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1035      break;
1036      /* disassembly mode/operand size/address size based tables.
1037       * 16 = 0,, 32 = 1, 64 = 2
1038       */
1039    case UD_TAB__OPC_MODE:
1040      idx = u->dis_mode / 32;
1041      break;
1042    case UD_TAB__OPC_OSIZE:
1043      idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1044      break;
1045    case UD_TAB__OPC_ASIZE:
1046      idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1047      break;
1048    case UD_TAB__OPC_X87:
1049      idx = modrm(u) - 0xC0;
1050      break;
1051    case UD_TAB__OPC_VENDOR:
1052      if (u->vendor == UD_VENDOR_ANY) {
1053        /* choose a valid entry */
1054        idx = (u->le->table[idx] != 0) ? 0 : 1;
1055      } else if (u->vendor == UD_VENDOR_AMD) {
1056        idx = 0;
1057      } else {
1058        idx = 1;
1059      }
1060      break;
1061    case UD_TAB__OPC_RM:
1062      idx = MODRM_RM(modrm(u));
1063      break;
1064    case UD_TAB__OPC_REG:
1065      idx = MODRM_REG(modrm(u));
1066      break;
1067    case UD_TAB__OPC_SSE:
1068      return decode_ssepfx(u);
1069    default:
1070      ASSERT(!"not reached");
1071      break;
1072  }
1073
1074  return decode_ext(u, u->le->table[idx]);
1075}
1076
1077
1078static inline int
1079decode_opcode(struct ud *u)
1080{
1081  uint16_t ptr;
1082  ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1083  ud_inp_next(u);
1084  if (u->error) {
1085    return -1;
1086  }
1087  ptr = u->le->table[ud_inp_curr(u)];
1088  if (ptr & 0x8000) {
1089    u->le = &ud_lookup_table_list[ptr & ~0x8000];
1090    if (u->le->type == UD_TAB__OPC_TABLE) {
1091      return decode_opcode(u);
1092    }
1093  }
1094  return decode_ext(u, ptr);
1095}
1096
1097
1098/* =============================================================================
1099 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1100 * =============================================================================
1101 */
1102unsigned int
1103ud_decode(struct ud *u)
1104{
1105  ud_inp_start(u);
1106  clear_insn(u);
1107  u->le = &ud_lookup_table_list[0];
1108  u->error = decode_prefixes(u) == -1 ||
1109             decode_opcode(u)   == -1 ||
1110             u->error;
1111  /* Handle decode error. */
1112  if (u->error) {
1113    /* clear out the decode data. */
1114    clear_insn(u);
1115    /* mark the sequence of bytes as invalid. */
1116    u->itab_entry = & s_ie__invalid;
1117    u->mnemonic = u->itab_entry->mnemonic;
1118  }
1119
1120    /* maybe this stray segment override byte
1121     * should be spewed out?
1122     */
1123    if ( !P_SEG( u->itab_entry->prefix ) &&
1124            u->operand[0].type != UD_OP_MEM &&
1125            u->operand[1].type != UD_OP_MEM )
1126        u->pfx_seg = 0;
1127
1128  u->insn_offset = u->pc; /* set offset of instruction */
1129  u->insn_fill = 0;   /* set translation buffer index to 0 */
1130  u->pc += u->inp_ctr;    /* move program counter by bytes decoded */
1131  gen_hex( u );       /* generate hex code */
1132
1133  /* return number of bytes disassembled. */
1134  return u->inp_ctr;
1135}
1136
1137/*
1138vim: set ts=2 sw=2 expandtab
1139*/
1140
1141#endif // USE(UDIS86)
1142