1/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the public interface of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
16#ifndef X86DISASSEMBLERDECODER_H
17#define X86DISASSEMBLERDECODER_H
18
19#ifdef __cplusplus
20extern "C" {
21#endif
22
23#define INSTRUCTION_SPECIFIER_FIELDS \
24  uint16_t operands;
25
26#define INSTRUCTION_IDS     \
27  uint16_t instructionIDs;
28
29#include "X86DisassemblerDecoderCommon.h"
30
31#undef INSTRUCTION_SPECIFIER_FIELDS
32#undef INSTRUCTION_IDS
33
34/*
35 * Accessor functions for various fields of an Intel instruction
36 */
37#define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
38#define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
39#define rmFromModRM(modRM)   ((modRM) & 0x7)
40#define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
41#define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
42#define baseFromSIB(sib)     ((sib) & 0x7)
43#define wFromREX(rex)        (((rex) & 0x8) >> 3)
44#define rFromREX(rex)        (((rex) & 0x4) >> 2)
45#define xFromREX(rex)        (((rex) & 0x2) >> 1)
46#define bFromREX(rex)        ((rex) & 0x1)
47
48#define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
49#define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
50#define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
51#define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
52#define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
53#define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
54#define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
55#define ppFromVEX3of3(vex)      ((vex) & 0x3)
56
57#define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
58#define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
59#define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
60#define ppFromVEX2of2(vex)      ((vex) & 0x3)
61
62/*
63 * These enums represent Intel registers for use by the decoder.
64 */
65
66#define REGS_8BIT     \
67  ENTRY(AL)           \
68  ENTRY(CL)           \
69  ENTRY(DL)           \
70  ENTRY(BL)           \
71  ENTRY(AH)           \
72  ENTRY(CH)           \
73  ENTRY(DH)           \
74  ENTRY(BH)           \
75  ENTRY(R8B)          \
76  ENTRY(R9B)          \
77  ENTRY(R10B)         \
78  ENTRY(R11B)         \
79  ENTRY(R12B)         \
80  ENTRY(R13B)         \
81  ENTRY(R14B)         \
82  ENTRY(R15B)         \
83  ENTRY(SPL)          \
84  ENTRY(BPL)          \
85  ENTRY(SIL)          \
86  ENTRY(DIL)
87
88#define EA_BASES_16BIT  \
89  ENTRY(BX_SI)          \
90  ENTRY(BX_DI)          \
91  ENTRY(BP_SI)          \
92  ENTRY(BP_DI)          \
93  ENTRY(SI)             \
94  ENTRY(DI)             \
95  ENTRY(BP)             \
96  ENTRY(BX)             \
97  ENTRY(R8W)            \
98  ENTRY(R9W)            \
99  ENTRY(R10W)           \
100  ENTRY(R11W)           \
101  ENTRY(R12W)           \
102  ENTRY(R13W)           \
103  ENTRY(R14W)           \
104  ENTRY(R15W)
105
106#define REGS_16BIT    \
107  ENTRY(AX)           \
108  ENTRY(CX)           \
109  ENTRY(DX)           \
110  ENTRY(BX)           \
111  ENTRY(SP)           \
112  ENTRY(BP)           \
113  ENTRY(SI)           \
114  ENTRY(DI)           \
115  ENTRY(R8W)          \
116  ENTRY(R9W)          \
117  ENTRY(R10W)         \
118  ENTRY(R11W)         \
119  ENTRY(R12W)         \
120  ENTRY(R13W)         \
121  ENTRY(R14W)         \
122  ENTRY(R15W)
123
124#define EA_BASES_32BIT  \
125  ENTRY(EAX)            \
126  ENTRY(ECX)            \
127  ENTRY(EDX)            \
128  ENTRY(EBX)            \
129  ENTRY(sib)            \
130  ENTRY(EBP)            \
131  ENTRY(ESI)            \
132  ENTRY(EDI)            \
133  ENTRY(R8D)            \
134  ENTRY(R9D)            \
135  ENTRY(R10D)           \
136  ENTRY(R11D)           \
137  ENTRY(R12D)           \
138  ENTRY(R13D)           \
139  ENTRY(R14D)           \
140  ENTRY(R15D)
141
142#define REGS_32BIT  \
143  ENTRY(EAX)        \
144  ENTRY(ECX)        \
145  ENTRY(EDX)        \
146  ENTRY(EBX)        \
147  ENTRY(ESP)        \
148  ENTRY(EBP)        \
149  ENTRY(ESI)        \
150  ENTRY(EDI)        \
151  ENTRY(R8D)        \
152  ENTRY(R9D)        \
153  ENTRY(R10D)       \
154  ENTRY(R11D)       \
155  ENTRY(R12D)       \
156  ENTRY(R13D)       \
157  ENTRY(R14D)       \
158  ENTRY(R15D)
159
160#define EA_BASES_64BIT  \
161  ENTRY(RAX)            \
162  ENTRY(RCX)            \
163  ENTRY(RDX)            \
164  ENTRY(RBX)            \
165  ENTRY(sib64)          \
166  ENTRY(RBP)            \
167  ENTRY(RSI)            \
168  ENTRY(RDI)            \
169  ENTRY(R8)             \
170  ENTRY(R9)             \
171  ENTRY(R10)            \
172  ENTRY(R11)            \
173  ENTRY(R12)            \
174  ENTRY(R13)            \
175  ENTRY(R14)            \
176  ENTRY(R15)
177
178#define REGS_64BIT  \
179  ENTRY(RAX)        \
180  ENTRY(RCX)        \
181  ENTRY(RDX)        \
182  ENTRY(RBX)        \
183  ENTRY(RSP)        \
184  ENTRY(RBP)        \
185  ENTRY(RSI)        \
186  ENTRY(RDI)        \
187  ENTRY(R8)         \
188  ENTRY(R9)         \
189  ENTRY(R10)        \
190  ENTRY(R11)        \
191  ENTRY(R12)        \
192  ENTRY(R13)        \
193  ENTRY(R14)        \
194  ENTRY(R15)
195
196#define REGS_MMX  \
197  ENTRY(MM0)      \
198  ENTRY(MM1)      \
199  ENTRY(MM2)      \
200  ENTRY(MM3)      \
201  ENTRY(MM4)      \
202  ENTRY(MM5)      \
203  ENTRY(MM6)      \
204  ENTRY(MM7)
205
206#define REGS_XMM  \
207  ENTRY(XMM0)     \
208  ENTRY(XMM1)     \
209  ENTRY(XMM2)     \
210  ENTRY(XMM3)     \
211  ENTRY(XMM4)     \
212  ENTRY(XMM5)     \
213  ENTRY(XMM6)     \
214  ENTRY(XMM7)     \
215  ENTRY(XMM8)     \
216  ENTRY(XMM9)     \
217  ENTRY(XMM10)    \
218  ENTRY(XMM11)    \
219  ENTRY(XMM12)    \
220  ENTRY(XMM13)    \
221  ENTRY(XMM14)    \
222  ENTRY(XMM15)
223
224#define REGS_YMM  \
225  ENTRY(YMM0)     \
226  ENTRY(YMM1)     \
227  ENTRY(YMM2)     \
228  ENTRY(YMM3)     \
229  ENTRY(YMM4)     \
230  ENTRY(YMM5)     \
231  ENTRY(YMM6)     \
232  ENTRY(YMM7)     \
233  ENTRY(YMM8)     \
234  ENTRY(YMM9)     \
235  ENTRY(YMM10)    \
236  ENTRY(YMM11)    \
237  ENTRY(YMM12)    \
238  ENTRY(YMM13)    \
239  ENTRY(YMM14)    \
240  ENTRY(YMM15)
241
242#define REGS_SEGMENT \
243  ENTRY(ES)          \
244  ENTRY(CS)          \
245  ENTRY(SS)          \
246  ENTRY(DS)          \
247  ENTRY(FS)          \
248  ENTRY(GS)
249
250#define REGS_DEBUG  \
251  ENTRY(DR0)        \
252  ENTRY(DR1)        \
253  ENTRY(DR2)        \
254  ENTRY(DR3)        \
255  ENTRY(DR4)        \
256  ENTRY(DR5)        \
257  ENTRY(DR6)        \
258  ENTRY(DR7)
259
260#define REGS_CONTROL  \
261  ENTRY(CR0)          \
262  ENTRY(CR1)          \
263  ENTRY(CR2)          \
264  ENTRY(CR3)          \
265  ENTRY(CR4)          \
266  ENTRY(CR5)          \
267  ENTRY(CR6)          \
268  ENTRY(CR7)          \
269  ENTRY(CR8)
270
271#define ALL_EA_BASES  \
272  EA_BASES_16BIT      \
273  EA_BASES_32BIT      \
274  EA_BASES_64BIT
275
276#define ALL_SIB_BASES \
277  REGS_32BIT          \
278  REGS_64BIT
279
280#define ALL_REGS      \
281  REGS_8BIT           \
282  REGS_16BIT          \
283  REGS_32BIT          \
284  REGS_64BIT          \
285  REGS_MMX            \
286  REGS_XMM            \
287  REGS_YMM            \
288  REGS_SEGMENT        \
289  REGS_DEBUG          \
290  REGS_CONTROL        \
291  ENTRY(RIP)
292
293/*
294 * EABase - All possible values of the base field for effective-address
295 *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
296 *   distinguish between bases (EA_BASE_*) and registers that just happen to be
297 *   referred to when Mod == 0b11 (EA_REG_*).
298 */
299typedef enum {
300  EA_BASE_NONE,
301#define ENTRY(x) EA_BASE_##x,
302  ALL_EA_BASES
303#undef ENTRY
304#define ENTRY(x) EA_REG_##x,
305  ALL_REGS
306#undef ENTRY
307  EA_max
308} EABase;
309
310/*
311 * SIBIndex - All possible values of the SIB index field.
312 *   Borrows entries from ALL_EA_BASES with the special case that
313 *   sib is synonymous with NONE.
314 * Vector SIB: index can be XMM or YMM.
315 */
316typedef enum {
317  SIB_INDEX_NONE,
318#define ENTRY(x) SIB_INDEX_##x,
319  ALL_EA_BASES
320  REGS_XMM
321  REGS_YMM
322#undef ENTRY
323  SIB_INDEX_max
324} SIBIndex;
325
326/*
327 * SIBBase - All possible values of the SIB base field.
328 */
329typedef enum {
330  SIB_BASE_NONE,
331#define ENTRY(x) SIB_BASE_##x,
332  ALL_SIB_BASES
333#undef ENTRY
334  SIB_BASE_max
335} SIBBase;
336
337/*
338 * EADisplacement - Possible displacement types for effective-address
339 *   computations.
340 */
341typedef enum {
342  EA_DISP_NONE,
343  EA_DISP_8,
344  EA_DISP_16,
345  EA_DISP_32
346} EADisplacement;
347
348/*
349 * Reg - All possible values of the reg field in the ModR/M byte.
350 */
351typedef enum {
352#define ENTRY(x) MODRM_REG_##x,
353  ALL_REGS
354#undef ENTRY
355  MODRM_REG_max
356} Reg;
357
358/*
359 * SegmentOverride - All possible segment overrides.
360 */
361typedef enum {
362  SEG_OVERRIDE_NONE,
363  SEG_OVERRIDE_CS,
364  SEG_OVERRIDE_SS,
365  SEG_OVERRIDE_DS,
366  SEG_OVERRIDE_ES,
367  SEG_OVERRIDE_FS,
368  SEG_OVERRIDE_GS,
369  SEG_OVERRIDE_max
370} SegmentOverride;
371
372/*
373 * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
374 */
375
376typedef enum {
377  VEX_LOB_0F = 0x1,
378  VEX_LOB_0F38 = 0x2,
379  VEX_LOB_0F3A = 0x3
380} VEXLeadingOpcodeByte;
381
382/*
383 * VEXPrefixCode - Possible values for the VEX.pp field
384 */
385
386typedef enum {
387  VEX_PREFIX_NONE = 0x0,
388  VEX_PREFIX_66 = 0x1,
389  VEX_PREFIX_F3 = 0x2,
390  VEX_PREFIX_F2 = 0x3
391} VEXPrefixCode;
392
393typedef uint8_t BOOL;
394
395/*
396 * byteReader_t - Type for the byte reader that the consumer must provide to
397 *   the decoder.  Reads a single byte from the instruction's address space.
398 * @param arg     - A baton that the consumer can associate with any internal
399 *                  state that it needs.
400 * @param byte    - A pointer to a single byte in memory that should be set to
401 *                  contain the value at address.
402 * @param address - The address in the instruction's address space that should
403 *                  be read from.
404 * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
405 */
406typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
407
408/*
409 * dlog_t - Type for the logging function that the consumer can provide to
410 *   get debugging output from the decoder.
411 * @param arg     - A baton that the consumer can associate with any internal
412 *                  state that it needs.
413 * @param log     - A string that contains the message.  Will be reused after
414 *                  the logger returns.
415 */
416typedef void (*dlog_t)(void* arg, const char *log);
417
418/*
419 * The x86 internal instruction, which is produced by the decoder.
420 */
421struct InternalInstruction {
422  /* Reader interface (C) */
423  byteReader_t reader;
424  /* Opaque value passed to the reader */
425  const void* readerArg;
426  /* The address of the next byte to read via the reader */
427  uint64_t readerCursor;
428
429  /* Logger interface (C) */
430  dlog_t dlog;
431  /* Opaque value passed to the logger */
432  void* dlogArg;
433
434  /* General instruction information */
435
436  /* The mode to disassemble for (64-bit, protected, real) */
437  DisassemblerMode mode;
438  /* The start of the instruction, usable with the reader */
439  uint64_t startLocation;
440  /* The length of the instruction, in bytes */
441  size_t length;
442
443  /* Prefix state */
444
445  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
446  uint8_t prefixPresent[0x100];
447  /* contains the location (for use with the reader) of the prefix byte */
448  uint64_t prefixLocations[0x100];
449  /* The value of the VEX prefix, if present */
450  uint8_t vexPrefix[3];
451  /* The length of the VEX prefix (0 if not present) */
452  uint8_t vexSize;
453  /* The value of the REX prefix, if present */
454  uint8_t rexPrefix;
455  /* The location where a mandatory prefix would have to be (i.e., right before
456     the opcode, or right before the REX prefix if one is present) */
457  uint64_t necessaryPrefixLocation;
458  /* The segment override type */
459  SegmentOverride segmentOverride;
460
461  /* Sizes of various critical pieces of data, in bytes */
462  uint8_t registerSize;
463  uint8_t addressSize;
464  uint8_t displacementSize;
465  uint8_t immediateSize;
466
467  /* Offsets from the start of the instruction to the pieces of data, which is
468     needed to find relocation entries for adding symbolic operands */
469  uint8_t displacementOffset;
470  uint8_t immediateOffset;
471
472  /* opcode state */
473
474  /* The value of the two-byte escape prefix (usually 0x0f) */
475  uint8_t twoByteEscape;
476  /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
477  uint8_t threeByteEscape;
478  /* The last byte of the opcode, not counting any ModR/M extension */
479  uint8_t opcode;
480  /* The ModR/M byte of the instruction, if it is an opcode extension */
481  uint8_t modRMExtension;
482
483  /* decode state */
484
485  /* The type of opcode, used for indexing into the array of decode tables */
486  OpcodeType opcodeType;
487  /* The instruction ID, extracted from the decode table */
488  uint16_t instructionID;
489  /* The specifier for the instruction, from the instruction info table */
490  const struct InstructionSpecifier *spec;
491
492  /* state for additional bytes, consumed during operand decode.  Pattern:
493     consumed___ indicates that the byte was already consumed and does not
494     need to be consumed again */
495
496  /* The VEX.vvvv field, which contains a third register operand for some AVX
497     instructions */
498  Reg                           vvvv;
499
500  /* The ModR/M byte, which contains most register operands and some portion of
501     all memory operands */
502  BOOL                          consumedModRM;
503  uint8_t                       modRM;
504
505  /* The SIB byte, used for more complex 32- or 64-bit memory operands */
506  BOOL                          consumedSIB;
507  uint8_t                       sib;
508
509  /* The displacement, used for memory operands */
510  BOOL                          consumedDisplacement;
511  int32_t                       displacement;
512
513  /* Immediates.  There can be two in some cases */
514  uint8_t                       numImmediatesConsumed;
515  uint8_t                       numImmediatesTranslated;
516  uint64_t                      immediates[2];
517
518  /* A register or immediate operand encoded into the opcode */
519  BOOL                          consumedOpcodeModifier;
520  uint8_t                       opcodeModifier;
521  Reg                           opcodeRegister;
522
523  /* Portions of the ModR/M byte */
524
525  /* These fields determine the allowable values for the ModR/M fields, which
526     depend on operand and address widths */
527  EABase                        eaBaseBase;
528  EABase                        eaRegBase;
529  Reg                           regBase;
530
531  /* The Mod and R/M fields can encode a base for an effective address, or a
532     register.  These are separated into two fields here */
533  EABase                        eaBase;
534  EADisplacement                eaDisplacement;
535  /* The reg field always encodes a register */
536  Reg                           reg;
537
538  /* SIB state */
539  SIBIndex                      sibIndex;
540  uint8_t                       sibScale;
541  SIBBase                       sibBase;
542
543  const struct OperandSpecifier *operands;
544};
545
546/* decodeInstruction - Decode one instruction and store the decoding results in
547 *   a buffer provided by the consumer.
548 * @param insn      - The buffer to store the instruction in.  Allocated by the
549 *                    consumer.
550 * @param reader    - The byteReader_t for the bytes to be read.
551 * @param readerArg - An argument to pass to the reader for storing context
552 *                    specific to the consumer.  May be NULL.
553 * @param logger    - The dlog_t to be used in printing status messages from the
554 *                    disassembler.  May be NULL.
555 * @param loggerArg - An argument to pass to the logger for storing context
556 *                    specific to the logger.  May be NULL.
557 * @param startLoc  - The address (in the reader's address space) of the first
558 *                    byte in the instruction.
559 * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
560 * @return          - Nonzero if there was an error during decode, 0 otherwise.
561 */
562int decodeInstruction(struct InternalInstruction* insn,
563                      byteReader_t reader,
564                      const void* readerArg,
565                      dlog_t logger,
566                      void* loggerArg,
567                      const void* miiArg,
568                      uint64_t startLoc,
569                      DisassemblerMode mode);
570
571/* x86DisassemblerDebug - C-accessible function for printing a message to
572 *   debugs()
573 * @param file  - The name of the file printing the debug message.
574 * @param line  - The line number that printed the debug message.
575 * @param s     - The message to print.
576 */
577
578void x86DisassemblerDebug(const char *file,
579                          unsigned line,
580                          const char *s);
581
582const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
583
584#ifdef __cplusplus
585}
586#endif
587
588#endif
589