X86DisassemblerDecoder.h revision 218893
1/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the public interface of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
16#ifndef X86DISASSEMBLERDECODER_H
17#define X86DISASSEMBLERDECODER_H
18
19#ifdef __cplusplus
20extern "C" {
21#endif
22
23#define INSTRUCTION_SPECIFIER_FIELDS  \
24  const char*             name;
25
26#define INSTRUCTION_IDS     \
27  const InstrUID *instructionIDs;
28
29#include "X86DisassemblerDecoderCommon.h"
30
31#undef INSTRUCTION_SPECIFIER_FIELDS
32#undef INSTRUCTION_IDS
33
34/*
35 * Accessor functions for various fields of an Intel instruction
36 */
37#define modFromModRM(modRM)  ((modRM & 0xc0) >> 6)
38#define regFromModRM(modRM)  ((modRM & 0x38) >> 3)
39#define rmFromModRM(modRM)   (modRM & 0x7)
40#define scaleFromSIB(sib)    ((sib & 0xc0) >> 6)
41#define indexFromSIB(sib)    ((sib & 0x38) >> 3)
42#define baseFromSIB(sib)     (sib & 0x7)
43#define wFromREX(rex)        ((rex & 0x8) >> 3)
44#define rFromREX(rex)        ((rex & 0x4) >> 2)
45#define xFromREX(rex)        ((rex & 0x2) >> 1)
46#define bFromREX(rex)        (rex & 0x1)
47
48/*
49 * These enums represent Intel registers for use by the decoder.
50 */
51
52#define REGS_8BIT     \
53  ENTRY(AL)           \
54  ENTRY(CL)           \
55  ENTRY(DL)           \
56  ENTRY(BL)           \
57  ENTRY(AH)           \
58  ENTRY(CH)           \
59  ENTRY(DH)           \
60  ENTRY(BH)           \
61  ENTRY(R8B)          \
62  ENTRY(R9B)          \
63  ENTRY(R10B)         \
64  ENTRY(R11B)         \
65  ENTRY(R12B)         \
66  ENTRY(R13B)         \
67  ENTRY(R14B)         \
68  ENTRY(R15B)         \
69  ENTRY(SPL)          \
70  ENTRY(BPL)          \
71  ENTRY(SIL)          \
72  ENTRY(DIL)
73
74#define EA_BASES_16BIT  \
75  ENTRY(BX_SI)          \
76  ENTRY(BX_DI)          \
77  ENTRY(BP_SI)          \
78  ENTRY(BP_DI)          \
79  ENTRY(SI)             \
80  ENTRY(DI)             \
81  ENTRY(BP)             \
82  ENTRY(BX)             \
83  ENTRY(R8W)            \
84  ENTRY(R9W)            \
85  ENTRY(R10W)           \
86  ENTRY(R11W)           \
87  ENTRY(R12W)           \
88  ENTRY(R13W)           \
89  ENTRY(R14W)           \
90  ENTRY(R15W)
91
92#define REGS_16BIT    \
93  ENTRY(AX)           \
94  ENTRY(CX)           \
95  ENTRY(DX)           \
96  ENTRY(BX)           \
97  ENTRY(SP)           \
98  ENTRY(BP)           \
99  ENTRY(SI)           \
100  ENTRY(DI)           \
101  ENTRY(R8W)          \
102  ENTRY(R9W)          \
103  ENTRY(R10W)         \
104  ENTRY(R11W)         \
105  ENTRY(R12W)         \
106  ENTRY(R13W)         \
107  ENTRY(R14W)         \
108  ENTRY(R15W)
109
110#define EA_BASES_32BIT  \
111  ENTRY(EAX)            \
112  ENTRY(ECX)            \
113  ENTRY(EDX)            \
114  ENTRY(EBX)            \
115  ENTRY(sib)            \
116  ENTRY(EBP)            \
117  ENTRY(ESI)            \
118  ENTRY(EDI)            \
119  ENTRY(R8D)            \
120  ENTRY(R9D)            \
121  ENTRY(R10D)           \
122  ENTRY(R11D)           \
123  ENTRY(R12D)           \
124  ENTRY(R13D)           \
125  ENTRY(R14D)           \
126  ENTRY(R15D)
127
128#define REGS_32BIT  \
129  ENTRY(EAX)        \
130  ENTRY(ECX)        \
131  ENTRY(EDX)        \
132  ENTRY(EBX)        \
133  ENTRY(ESP)        \
134  ENTRY(EBP)        \
135  ENTRY(ESI)        \
136  ENTRY(EDI)        \
137  ENTRY(R8D)        \
138  ENTRY(R9D)        \
139  ENTRY(R10D)       \
140  ENTRY(R11D)       \
141  ENTRY(R12D)       \
142  ENTRY(R13D)       \
143  ENTRY(R14D)       \
144  ENTRY(R15D)
145
146#define EA_BASES_64BIT  \
147  ENTRY(RAX)            \
148  ENTRY(RCX)            \
149  ENTRY(RDX)            \
150  ENTRY(RBX)            \
151  ENTRY(sib64)          \
152  ENTRY(RBP)            \
153  ENTRY(RSI)            \
154  ENTRY(RDI)            \
155  ENTRY(R8)             \
156  ENTRY(R9)             \
157  ENTRY(R10)            \
158  ENTRY(R11)            \
159  ENTRY(R12)            \
160  ENTRY(R13)            \
161  ENTRY(R14)            \
162  ENTRY(R15)
163
164#define REGS_64BIT  \
165  ENTRY(RAX)        \
166  ENTRY(RCX)        \
167  ENTRY(RDX)        \
168  ENTRY(RBX)        \
169  ENTRY(RSP)        \
170  ENTRY(RBP)        \
171  ENTRY(RSI)        \
172  ENTRY(RDI)        \
173  ENTRY(R8)         \
174  ENTRY(R9)         \
175  ENTRY(R10)        \
176  ENTRY(R11)        \
177  ENTRY(R12)        \
178  ENTRY(R13)        \
179  ENTRY(R14)        \
180  ENTRY(R15)
181
182#define REGS_MMX  \
183  ENTRY(MM0)      \
184  ENTRY(MM1)      \
185  ENTRY(MM2)      \
186  ENTRY(MM3)      \
187  ENTRY(MM4)      \
188  ENTRY(MM5)      \
189  ENTRY(MM6)      \
190  ENTRY(MM7)
191
192#define REGS_XMM  \
193  ENTRY(XMM0)     \
194  ENTRY(XMM1)     \
195  ENTRY(XMM2)     \
196  ENTRY(XMM3)     \
197  ENTRY(XMM4)     \
198  ENTRY(XMM5)     \
199  ENTRY(XMM6)     \
200  ENTRY(XMM7)     \
201  ENTRY(XMM8)     \
202  ENTRY(XMM9)     \
203  ENTRY(XMM10)    \
204  ENTRY(XMM11)    \
205  ENTRY(XMM12)    \
206  ENTRY(XMM13)    \
207  ENTRY(XMM14)    \
208  ENTRY(XMM15)
209
210#define REGS_SEGMENT \
211  ENTRY(ES)          \
212  ENTRY(CS)          \
213  ENTRY(SS)          \
214  ENTRY(DS)          \
215  ENTRY(FS)          \
216  ENTRY(GS)
217
218#define REGS_DEBUG  \
219  ENTRY(DR0)        \
220  ENTRY(DR1)        \
221  ENTRY(DR2)        \
222  ENTRY(DR3)        \
223  ENTRY(DR4)        \
224  ENTRY(DR5)        \
225  ENTRY(DR6)        \
226  ENTRY(DR7)
227
228#define REGS_CONTROL  \
229  ENTRY(CR0)          \
230  ENTRY(CR1)          \
231  ENTRY(CR2)          \
232  ENTRY(CR3)          \
233  ENTRY(CR4)          \
234  ENTRY(CR5)          \
235  ENTRY(CR6)          \
236  ENTRY(CR7)          \
237  ENTRY(CR8)
238
239#define ALL_EA_BASES  \
240  EA_BASES_16BIT      \
241  EA_BASES_32BIT      \
242  EA_BASES_64BIT
243
244#define ALL_SIB_BASES \
245  REGS_32BIT          \
246  REGS_64BIT
247
248#define ALL_REGS      \
249  REGS_8BIT           \
250  REGS_16BIT          \
251  REGS_32BIT          \
252  REGS_64BIT          \
253  REGS_MMX            \
254  REGS_XMM            \
255  REGS_SEGMENT        \
256  REGS_DEBUG          \
257  REGS_CONTROL        \
258  ENTRY(RIP)
259
260/*
261 * EABase - All possible values of the base field for effective-address
262 *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
263 *   distinguish between bases (EA_BASE_*) and registers that just happen to be
264 *   referred to when Mod == 0b11 (EA_REG_*).
265 */
266typedef enum {
267  EA_BASE_NONE,
268#define ENTRY(x) EA_BASE_##x,
269  ALL_EA_BASES
270#undef ENTRY
271#define ENTRY(x) EA_REG_##x,
272  ALL_REGS
273#undef ENTRY
274  EA_max
275} EABase;
276
277/*
278 * SIBIndex - All possible values of the SIB index field.
279 *   Borrows entries from ALL_EA_BASES with the special case that
280 *   sib is synonymous with NONE.
281 */
282typedef enum {
283  SIB_INDEX_NONE,
284#define ENTRY(x) SIB_INDEX_##x,
285  ALL_EA_BASES
286#undef ENTRY
287  SIB_INDEX_max
288} SIBIndex;
289
290/*
291 * SIBBase - All possible values of the SIB base field.
292 */
293typedef enum {
294  SIB_BASE_NONE,
295#define ENTRY(x) SIB_BASE_##x,
296  ALL_SIB_BASES
297#undef ENTRY
298  SIB_BASE_max
299} SIBBase;
300
301/*
302 * EADisplacement - Possible displacement types for effective-address
303 *   computations.
304 */
305typedef enum {
306  EA_DISP_NONE,
307  EA_DISP_8,
308  EA_DISP_16,
309  EA_DISP_32
310} EADisplacement;
311
312/*
313 * Reg - All possible values of the reg field in the ModR/M byte.
314 */
315typedef enum {
316#define ENTRY(x) MODRM_REG_##x,
317  ALL_REGS
318#undef ENTRY
319  MODRM_REG_max
320} Reg;
321
322/*
323 * SegmentOverride - All possible segment overrides.
324 */
325typedef enum {
326  SEG_OVERRIDE_NONE,
327  SEG_OVERRIDE_CS,
328  SEG_OVERRIDE_SS,
329  SEG_OVERRIDE_DS,
330  SEG_OVERRIDE_ES,
331  SEG_OVERRIDE_FS,
332  SEG_OVERRIDE_GS,
333  SEG_OVERRIDE_max
334} SegmentOverride;
335
336typedef uint8_t BOOL;
337
338/*
339 * byteReader_t - Type for the byte reader that the consumer must provide to
340 *   the decoder.  Reads a single byte from the instruction's address space.
341 * @param arg     - A baton that the consumer can associate with any internal
342 *                  state that it needs.
343 * @param byte    - A pointer to a single byte in memory that should be set to
344 *                  contain the value at address.
345 * @param address - The address in the instruction's address space that should
346 *                  be read from.
347 * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
348 */
349typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
350
351/*
352 * dlog_t - Type for the logging function that the consumer can provide to
353 *   get debugging output from the decoder.
354 * @param arg     - A baton that the consumer can associate with any internal
355 *                  state that it needs.
356 * @param log     - A string that contains the message.  Will be reused after
357 *                  the logger returns.
358 */
359typedef void (*dlog_t)(void* arg, const char *log);
360
361/*
362 * The x86 internal instruction, which is produced by the decoder.
363 */
364struct InternalInstruction {
365  /* Reader interface (C) */
366  byteReader_t reader;
367  /* Opaque value passed to the reader */
368  void* readerArg;
369  /* The address of the next byte to read via the reader */
370  uint64_t readerCursor;
371
372  /* Logger interface (C) */
373  dlog_t dlog;
374  /* Opaque value passed to the logger */
375  void* dlogArg;
376
377  /* General instruction information */
378
379  /* The mode to disassemble for (64-bit, protected, real) */
380  DisassemblerMode mode;
381  /* The start of the instruction, usable with the reader */
382  uint64_t startLocation;
383  /* The length of the instruction, in bytes */
384  size_t length;
385
386  /* Prefix state */
387
388  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
389  uint8_t prefixPresent[0x100];
390  /* contains the location (for use with the reader) of the prefix byte */
391  uint64_t prefixLocations[0x100];
392  /* The value of the REX prefix, if present */
393  uint8_t rexPrefix;
394  /* The location of the REX prefix */
395  uint64_t rexLocation;
396  /* The location where a mandatory prefix would have to be (i.e., right before
397     the opcode, or right before the REX prefix if one is present) */
398  uint64_t necessaryPrefixLocation;
399  /* The segment override type */
400  SegmentOverride segmentOverride;
401
402  /* Sizes of various critical pieces of data */
403  uint8_t registerSize;
404  uint8_t addressSize;
405  uint8_t displacementSize;
406  uint8_t immediateSize;
407
408  /* opcode state */
409
410  /* The value of the two-byte escape prefix (usually 0x0f) */
411  uint8_t twoByteEscape;
412  /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
413  uint8_t threeByteEscape;
414  /* The last byte of the opcode, not counting any ModR/M extension */
415  uint8_t opcode;
416  /* The ModR/M byte of the instruction, if it is an opcode extension */
417  uint8_t modRMExtension;
418
419  /* decode state */
420
421  /* The type of opcode, used for indexing into the array of decode tables */
422  OpcodeType opcodeType;
423  /* The instruction ID, extracted from the decode table */
424  uint16_t instructionID;
425  /* The specifier for the instruction, from the instruction info table */
426  const struct InstructionSpecifier *spec;
427
428  /* state for additional bytes, consumed during operand decode.  Pattern:
429     consumed___ indicates that the byte was already consumed and does not
430     need to be consumed again */
431
432  /* The ModR/M byte, which contains most register operands and some portion of
433     all memory operands */
434  BOOL                          consumedModRM;
435  uint8_t                       modRM;
436
437  /* The SIB byte, used for more complex 32- or 64-bit memory operands */
438  BOOL                          consumedSIB;
439  uint8_t                       sib;
440
441  /* The displacement, used for memory operands */
442  BOOL                          consumedDisplacement;
443  int32_t                       displacement;
444
445  /* Immediates.  There can be two in some cases */
446  uint8_t                       numImmediatesConsumed;
447  uint8_t                       numImmediatesTranslated;
448  uint64_t                      immediates[2];
449
450  /* A register or immediate operand encoded into the opcode */
451  BOOL                          consumedOpcodeModifier;
452  uint8_t                       opcodeModifier;
453  Reg                           opcodeRegister;
454
455  /* Portions of the ModR/M byte */
456
457  /* These fields determine the allowable values for the ModR/M fields, which
458     depend on operand and address widths */
459  EABase                        eaBaseBase;
460  EABase                        eaRegBase;
461  Reg                           regBase;
462
463  /* The Mod and R/M fields can encode a base for an effective address, or a
464     register.  These are separated into two fields here */
465  EABase                        eaBase;
466  EADisplacement                eaDisplacement;
467  /* The reg field always encodes a register */
468  Reg                           reg;
469
470  /* SIB state */
471  SIBIndex                      sibIndex;
472  uint8_t                       sibScale;
473  SIBBase                       sibBase;
474};
475
476/* decodeInstruction - Decode one instruction and store the decoding results in
477 *   a buffer provided by the consumer.
478 * @param insn      - The buffer to store the instruction in.  Allocated by the
479 *                    consumer.
480 * @param reader    - The byteReader_t for the bytes to be read.
481 * @param readerArg - An argument to pass to the reader for storing context
482 *                    specific to the consumer.  May be NULL.
483 * @param logger    - The dlog_t to be used in printing status messages from the
484 *                    disassembler.  May be NULL.
485 * @param loggerArg - An argument to pass to the logger for storing context
486 *                    specific to the logger.  May be NULL.
487 * @param startLoc  - The address (in the reader's address space) of the first
488 *                    byte in the instruction.
489 * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
490 * @return          - Nonzero if there was an error during decode, 0 otherwise.
491 */
492int decodeInstruction(struct InternalInstruction* insn,
493                      byteReader_t reader,
494                      void* readerArg,
495                      dlog_t logger,
496                      void* loggerArg,
497                      uint64_t startLoc,
498                      DisassemblerMode mode);
499
500/* x86DisassemblerDebug - C-accessible function for printing a message to
501 *   debugs()
502 * @param file  - The name of the file printing the debug message.
503 * @param line  - The line number that printed the debug message.
504 * @param s     - The message to print.
505 */
506
507void x86DisassemblerDebug(const char *file,
508                          unsigned line,
509                          const char *s);
510
511#ifdef __cplusplus
512}
513#endif
514
515#endif
516