1//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is part of the X86 Disassembler.
10// It contains the public interface of the instruction decoder.
11// Documentation for the disassembler can be found in X86Disassembler.h.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
16#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/Support/X86DisassemblerDecoderCommon.h"
20
21namespace llvm {
22namespace X86Disassembler {
23
24// Accessor functions for various fields of an Intel instruction
25#define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
26#define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
27#define rmFromModRM(modRM)   ((modRM) & 0x7)
28#define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
29#define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
30#define baseFromSIB(sib)     ((sib) & 0x7)
31#define wFromREX(rex)        (((rex) & 0x8) >> 3)
32#define rFromREX(rex)        (((rex) & 0x4) >> 2)
33#define xFromREX(rex)        (((rex) & 0x2) >> 1)
34#define bFromREX(rex)        ((rex) & 0x1)
35
36#define rFromEVEX2of4(evex)     (((~(evex)) & 0x80) >> 7)
37#define xFromEVEX2of4(evex)     (((~(evex)) & 0x40) >> 6)
38#define bFromEVEX2of4(evex)     (((~(evex)) & 0x20) >> 5)
39#define r2FromEVEX2of4(evex)    (((~(evex)) & 0x10) >> 4)
40#define mmmFromEVEX2of4(evex)   ((evex) & 0x7)
41#define wFromEVEX3of4(evex)     (((evex) & 0x80) >> 7)
42#define vvvvFromEVEX3of4(evex)  (((~(evex)) & 0x78) >> 3)
43#define ppFromEVEX3of4(evex)    ((evex) & 0x3)
44#define zFromEVEX4of4(evex)     (((evex) & 0x80) >> 7)
45#define l2FromEVEX4of4(evex)    (((evex) & 0x40) >> 6)
46#define lFromEVEX4of4(evex)     (((evex) & 0x20) >> 5)
47#define bFromEVEX4of4(evex)     (((evex) & 0x10) >> 4)
48#define v2FromEVEX4of4(evex)    (((~evex) & 0x8) >> 3)
49#define aaaFromEVEX4of4(evex)   ((evex) & 0x7)
50
51#define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
52#define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
53#define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
54#define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
55#define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
56#define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
57#define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
58#define ppFromVEX3of3(vex)      ((vex) & 0x3)
59
60#define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
61#define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
62#define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
63#define ppFromVEX2of2(vex)      ((vex) & 0x3)
64
65#define rFromXOP2of3(xop)       (((~(xop)) & 0x80) >> 7)
66#define xFromXOP2of3(xop)       (((~(xop)) & 0x40) >> 6)
67#define bFromXOP2of3(xop)       (((~(xop)) & 0x20) >> 5)
68#define mmmmmFromXOP2of3(xop)   ((xop) & 0x1f)
69#define wFromXOP3of3(xop)       (((xop) & 0x80) >> 7)
70#define vvvvFromXOP3of3(vex)    (((~(vex)) & 0x78) >> 3)
71#define lFromXOP3of3(xop)       (((xop) & 0x4) >> 2)
72#define ppFromXOP3of3(xop)      ((xop) & 0x3)
73
74// These enums represent Intel registers for use by the decoder.
75#define REGS_8BIT     \
76  ENTRY(AL)           \
77  ENTRY(CL)           \
78  ENTRY(DL)           \
79  ENTRY(BL)           \
80  ENTRY(AH)           \
81  ENTRY(CH)           \
82  ENTRY(DH)           \
83  ENTRY(BH)           \
84  ENTRY(R8B)          \
85  ENTRY(R9B)          \
86  ENTRY(R10B)         \
87  ENTRY(R11B)         \
88  ENTRY(R12B)         \
89  ENTRY(R13B)         \
90  ENTRY(R14B)         \
91  ENTRY(R15B)         \
92  ENTRY(SPL)          \
93  ENTRY(BPL)          \
94  ENTRY(SIL)          \
95  ENTRY(DIL)
96
97#define EA_BASES_16BIT  \
98  ENTRY(BX_SI)          \
99  ENTRY(BX_DI)          \
100  ENTRY(BP_SI)          \
101  ENTRY(BP_DI)          \
102  ENTRY(SI)             \
103  ENTRY(DI)             \
104  ENTRY(BP)             \
105  ENTRY(BX)             \
106  ENTRY(R8W)            \
107  ENTRY(R9W)            \
108  ENTRY(R10W)           \
109  ENTRY(R11W)           \
110  ENTRY(R12W)           \
111  ENTRY(R13W)           \
112  ENTRY(R14W)           \
113  ENTRY(R15W)
114
115#define REGS_16BIT    \
116  ENTRY(AX)           \
117  ENTRY(CX)           \
118  ENTRY(DX)           \
119  ENTRY(BX)           \
120  ENTRY(SP)           \
121  ENTRY(BP)           \
122  ENTRY(SI)           \
123  ENTRY(DI)           \
124  ENTRY(R8W)          \
125  ENTRY(R9W)          \
126  ENTRY(R10W)         \
127  ENTRY(R11W)         \
128  ENTRY(R12W)         \
129  ENTRY(R13W)         \
130  ENTRY(R14W)         \
131  ENTRY(R15W)
132
133#define EA_BASES_32BIT  \
134  ENTRY(EAX)            \
135  ENTRY(ECX)            \
136  ENTRY(EDX)            \
137  ENTRY(EBX)            \
138  ENTRY(sib)            \
139  ENTRY(EBP)            \
140  ENTRY(ESI)            \
141  ENTRY(EDI)            \
142  ENTRY(R8D)            \
143  ENTRY(R9D)            \
144  ENTRY(R10D)           \
145  ENTRY(R11D)           \
146  ENTRY(R12D)           \
147  ENTRY(R13D)           \
148  ENTRY(R14D)           \
149  ENTRY(R15D)
150
151#define REGS_32BIT  \
152  ENTRY(EAX)        \
153  ENTRY(ECX)        \
154  ENTRY(EDX)        \
155  ENTRY(EBX)        \
156  ENTRY(ESP)        \
157  ENTRY(EBP)        \
158  ENTRY(ESI)        \
159  ENTRY(EDI)        \
160  ENTRY(R8D)        \
161  ENTRY(R9D)        \
162  ENTRY(R10D)       \
163  ENTRY(R11D)       \
164  ENTRY(R12D)       \
165  ENTRY(R13D)       \
166  ENTRY(R14D)       \
167  ENTRY(R15D)
168
169#define EA_BASES_64BIT  \
170  ENTRY(RAX)            \
171  ENTRY(RCX)            \
172  ENTRY(RDX)            \
173  ENTRY(RBX)            \
174  ENTRY(sib64)          \
175  ENTRY(RBP)            \
176  ENTRY(RSI)            \
177  ENTRY(RDI)            \
178  ENTRY(R8)             \
179  ENTRY(R9)             \
180  ENTRY(R10)            \
181  ENTRY(R11)            \
182  ENTRY(R12)            \
183  ENTRY(R13)            \
184  ENTRY(R14)            \
185  ENTRY(R15)
186
187#define REGS_64BIT  \
188  ENTRY(RAX)        \
189  ENTRY(RCX)        \
190  ENTRY(RDX)        \
191  ENTRY(RBX)        \
192  ENTRY(RSP)        \
193  ENTRY(RBP)        \
194  ENTRY(RSI)        \
195  ENTRY(RDI)        \
196  ENTRY(R8)         \
197  ENTRY(R9)         \
198  ENTRY(R10)        \
199  ENTRY(R11)        \
200  ENTRY(R12)        \
201  ENTRY(R13)        \
202  ENTRY(R14)        \
203  ENTRY(R15)
204
205#define REGS_MMX  \
206  ENTRY(MM0)      \
207  ENTRY(MM1)      \
208  ENTRY(MM2)      \
209  ENTRY(MM3)      \
210  ENTRY(MM4)      \
211  ENTRY(MM5)      \
212  ENTRY(MM6)      \
213  ENTRY(MM7)
214
215#define REGS_XMM  \
216  ENTRY(XMM0)     \
217  ENTRY(XMM1)     \
218  ENTRY(XMM2)     \
219  ENTRY(XMM3)     \
220  ENTRY(XMM4)     \
221  ENTRY(XMM5)     \
222  ENTRY(XMM6)     \
223  ENTRY(XMM7)     \
224  ENTRY(XMM8)     \
225  ENTRY(XMM9)     \
226  ENTRY(XMM10)    \
227  ENTRY(XMM11)    \
228  ENTRY(XMM12)    \
229  ENTRY(XMM13)    \
230  ENTRY(XMM14)    \
231  ENTRY(XMM15)    \
232  ENTRY(XMM16)    \
233  ENTRY(XMM17)    \
234  ENTRY(XMM18)    \
235  ENTRY(XMM19)    \
236  ENTRY(XMM20)    \
237  ENTRY(XMM21)    \
238  ENTRY(XMM22)    \
239  ENTRY(XMM23)    \
240  ENTRY(XMM24)    \
241  ENTRY(XMM25)    \
242  ENTRY(XMM26)    \
243  ENTRY(XMM27)    \
244  ENTRY(XMM28)    \
245  ENTRY(XMM29)    \
246  ENTRY(XMM30)    \
247  ENTRY(XMM31)
248
249#define REGS_YMM  \
250  ENTRY(YMM0)     \
251  ENTRY(YMM1)     \
252  ENTRY(YMM2)     \
253  ENTRY(YMM3)     \
254  ENTRY(YMM4)     \
255  ENTRY(YMM5)     \
256  ENTRY(YMM6)     \
257  ENTRY(YMM7)     \
258  ENTRY(YMM8)     \
259  ENTRY(YMM9)     \
260  ENTRY(YMM10)    \
261  ENTRY(YMM11)    \
262  ENTRY(YMM12)    \
263  ENTRY(YMM13)    \
264  ENTRY(YMM14)    \
265  ENTRY(YMM15)    \
266  ENTRY(YMM16)    \
267  ENTRY(YMM17)    \
268  ENTRY(YMM18)    \
269  ENTRY(YMM19)    \
270  ENTRY(YMM20)    \
271  ENTRY(YMM21)    \
272  ENTRY(YMM22)    \
273  ENTRY(YMM23)    \
274  ENTRY(YMM24)    \
275  ENTRY(YMM25)    \
276  ENTRY(YMM26)    \
277  ENTRY(YMM27)    \
278  ENTRY(YMM28)    \
279  ENTRY(YMM29)    \
280  ENTRY(YMM30)    \
281  ENTRY(YMM31)
282
283#define REGS_ZMM  \
284  ENTRY(ZMM0)     \
285  ENTRY(ZMM1)     \
286  ENTRY(ZMM2)     \
287  ENTRY(ZMM3)     \
288  ENTRY(ZMM4)     \
289  ENTRY(ZMM5)     \
290  ENTRY(ZMM6)     \
291  ENTRY(ZMM7)     \
292  ENTRY(ZMM8)     \
293  ENTRY(ZMM9)     \
294  ENTRY(ZMM10)    \
295  ENTRY(ZMM11)    \
296  ENTRY(ZMM12)    \
297  ENTRY(ZMM13)    \
298  ENTRY(ZMM14)    \
299  ENTRY(ZMM15)    \
300  ENTRY(ZMM16)    \
301  ENTRY(ZMM17)    \
302  ENTRY(ZMM18)    \
303  ENTRY(ZMM19)    \
304  ENTRY(ZMM20)    \
305  ENTRY(ZMM21)    \
306  ENTRY(ZMM22)    \
307  ENTRY(ZMM23)    \
308  ENTRY(ZMM24)    \
309  ENTRY(ZMM25)    \
310  ENTRY(ZMM26)    \
311  ENTRY(ZMM27)    \
312  ENTRY(ZMM28)    \
313  ENTRY(ZMM29)    \
314  ENTRY(ZMM30)    \
315  ENTRY(ZMM31)
316
317#define REGS_MASKS \
318  ENTRY(K0)        \
319  ENTRY(K1)        \
320  ENTRY(K2)        \
321  ENTRY(K3)        \
322  ENTRY(K4)        \
323  ENTRY(K5)        \
324  ENTRY(K6)        \
325  ENTRY(K7)
326
327#define REGS_MASK_PAIRS \
328  ENTRY(K0_K1)     \
329  ENTRY(K2_K3)     \
330  ENTRY(K4_K5)     \
331  ENTRY(K6_K7)
332
333#define REGS_SEGMENT \
334  ENTRY(ES)          \
335  ENTRY(CS)          \
336  ENTRY(SS)          \
337  ENTRY(DS)          \
338  ENTRY(FS)          \
339  ENTRY(GS)
340
341#define REGS_DEBUG  \
342  ENTRY(DR0)        \
343  ENTRY(DR1)        \
344  ENTRY(DR2)        \
345  ENTRY(DR3)        \
346  ENTRY(DR4)        \
347  ENTRY(DR5)        \
348  ENTRY(DR6)        \
349  ENTRY(DR7)        \
350  ENTRY(DR8)        \
351  ENTRY(DR9)        \
352  ENTRY(DR10)       \
353  ENTRY(DR11)       \
354  ENTRY(DR12)       \
355  ENTRY(DR13)       \
356  ENTRY(DR14)       \
357  ENTRY(DR15)
358
359#define REGS_CONTROL  \
360  ENTRY(CR0)          \
361  ENTRY(CR1)          \
362  ENTRY(CR2)          \
363  ENTRY(CR3)          \
364  ENTRY(CR4)          \
365  ENTRY(CR5)          \
366  ENTRY(CR6)          \
367  ENTRY(CR7)          \
368  ENTRY(CR8)          \
369  ENTRY(CR9)          \
370  ENTRY(CR10)         \
371  ENTRY(CR11)         \
372  ENTRY(CR12)         \
373  ENTRY(CR13)         \
374  ENTRY(CR14)         \
375  ENTRY(CR15)
376
377#undef  REGS_TMM
378#define REGS_TMM  \
379  ENTRY(TMM0)     \
380  ENTRY(TMM1)     \
381  ENTRY(TMM2)     \
382  ENTRY(TMM3)     \
383  ENTRY(TMM4)     \
384  ENTRY(TMM5)     \
385  ENTRY(TMM6)     \
386  ENTRY(TMM7)
387
388#define ALL_EA_BASES  \
389  EA_BASES_16BIT      \
390  EA_BASES_32BIT      \
391  EA_BASES_64BIT
392
393#define ALL_SIB_BASES \
394  REGS_32BIT          \
395  REGS_64BIT
396
397#define ALL_REGS      \
398  REGS_8BIT           \
399  REGS_16BIT          \
400  REGS_32BIT          \
401  REGS_64BIT          \
402  REGS_MMX            \
403  REGS_XMM            \
404  REGS_YMM            \
405  REGS_ZMM            \
406  REGS_MASKS          \
407  REGS_MASK_PAIRS     \
408  REGS_SEGMENT        \
409  REGS_DEBUG          \
410  REGS_CONTROL        \
411  REGS_TMM            \
412  ENTRY(RIP)
413
414/// All possible values of the base field for effective-address
415/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte.
416/// We distinguish between bases (EA_BASE_*) and registers that just happen
417/// to be referred to when Mod == 0b11 (EA_REG_*).
418enum EABase {
419  EA_BASE_NONE,
420#define ENTRY(x) EA_BASE_##x,
421  ALL_EA_BASES
422#undef ENTRY
423#define ENTRY(x) EA_REG_##x,
424  ALL_REGS
425#undef ENTRY
426  EA_max
427};
428
429/// All possible values of the SIB index field.
430/// borrows entries from ALL_EA_BASES with the special case that
431/// sib is synonymous with NONE.
432/// Vector SIB: index can be XMM or YMM.
433enum SIBIndex {
434  SIB_INDEX_NONE,
435#define ENTRY(x) SIB_INDEX_##x,
436  ALL_EA_BASES
437  REGS_XMM
438  REGS_YMM
439  REGS_ZMM
440#undef ENTRY
441  SIB_INDEX_max
442};
443
444/// All possible values of the SIB base field.
445enum SIBBase {
446  SIB_BASE_NONE,
447#define ENTRY(x) SIB_BASE_##x,
448  ALL_SIB_BASES
449#undef ENTRY
450  SIB_BASE_max
451};
452
453/// Possible displacement types for effective-address computations.
454enum EADisplacement {
455  EA_DISP_NONE,
456  EA_DISP_8,
457  EA_DISP_16,
458  EA_DISP_32
459};
460
461/// All possible values of the reg field in the ModR/M byte.
462enum Reg {
463#define ENTRY(x) MODRM_REG_##x,
464  ALL_REGS
465#undef ENTRY
466  MODRM_REG_max
467};
468
469/// All possible segment overrides.
470enum SegmentOverride {
471  SEG_OVERRIDE_NONE,
472  SEG_OVERRIDE_CS,
473  SEG_OVERRIDE_SS,
474  SEG_OVERRIDE_DS,
475  SEG_OVERRIDE_ES,
476  SEG_OVERRIDE_FS,
477  SEG_OVERRIDE_GS,
478  SEG_OVERRIDE_max
479};
480
481/// Possible values for the VEX.m-mmmm field
482enum VEXLeadingOpcodeByte {
483  VEX_LOB_0F = 0x1,
484  VEX_LOB_0F38 = 0x2,
485  VEX_LOB_0F3A = 0x3,
486  VEX_LOB_MAP5 = 0x5,
487  VEX_LOB_MAP6 = 0x6
488};
489
490enum XOPMapSelect {
491  XOP_MAP_SELECT_8 = 0x8,
492  XOP_MAP_SELECT_9 = 0x9,
493  XOP_MAP_SELECT_A = 0xA
494};
495
496/// Possible values for the VEX.pp/EVEX.pp field
497enum VEXPrefixCode {
498  VEX_PREFIX_NONE = 0x0,
499  VEX_PREFIX_66 = 0x1,
500  VEX_PREFIX_F3 = 0x2,
501  VEX_PREFIX_F2 = 0x3
502};
503
504enum VectorExtensionType {
505  TYPE_NO_VEX_XOP   = 0x0,
506  TYPE_VEX_2B       = 0x1,
507  TYPE_VEX_3B       = 0x2,
508  TYPE_EVEX         = 0x3,
509  TYPE_XOP          = 0x4
510};
511
512/// The specification for how to extract and interpret a full instruction and
513/// its operands.
514struct InstructionSpecifier {
515  uint16_t operands;
516};
517
518/// The x86 internal instruction, which is produced by the decoder.
519struct InternalInstruction {
520  // Opaque value passed to the reader
521  llvm::ArrayRef<uint8_t> bytes;
522  // The address of the next byte to read via the reader
523  uint64_t readerCursor;
524
525  // General instruction information
526
527  // The mode to disassemble for (64-bit, protected, real)
528  DisassemblerMode mode;
529  // The start of the instruction, usable with the reader
530  uint64_t startLocation;
531  // The length of the instruction, in bytes
532  size_t length;
533
534  // Prefix state
535
536  // The possible mandatory prefix
537  uint8_t mandatoryPrefix;
538  // The value of the vector extension prefix(EVEX/VEX/XOP), if present
539  uint8_t vectorExtensionPrefix[4];
540  // The type of the vector extension prefix
541  VectorExtensionType vectorExtensionType;
542  // The value of the REX prefix, if present
543  uint8_t rexPrefix;
544  // The segment override type
545  SegmentOverride segmentOverride;
546  // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease
547  bool xAcquireRelease;
548
549  // Address-size override
550  bool hasAdSize;
551  // Operand-size override
552  bool hasOpSize;
553  // Lock prefix
554  bool hasLockPrefix;
555  // The repeat prefix if any
556  uint8_t repeatPrefix;
557
558  // Sizes of various critical pieces of data, in bytes
559  uint8_t registerSize;
560  uint8_t addressSize;
561  uint8_t displacementSize;
562  uint8_t immediateSize;
563
564  // Offsets from the start of the instruction to the pieces of data, which is
565  // needed to find relocation entries for adding symbolic operands.
566  uint8_t displacementOffset;
567  uint8_t immediateOffset;
568
569  // opcode state
570
571  // The last byte of the opcode, not counting any ModR/M extension
572  uint8_t opcode;
573
574  // decode state
575
576  // The type of opcode, used for indexing into the array of decode tables
577  OpcodeType opcodeType;
578  // The instruction ID, extracted from the decode table
579  uint16_t instructionID;
580  // The specifier for the instruction, from the instruction info table
581  const InstructionSpecifier *spec;
582
583  // state for additional bytes, consumed during operand decode.  Pattern:
584  // consumed___ indicates that the byte was already consumed and does not
585  // need to be consumed again.
586
587  // The VEX.vvvv field, which contains a third register operand for some AVX
588  // instructions.
589  Reg                           vvvv;
590
591  // The writemask for AVX-512 instructions which is contained in EVEX.aaa
592  Reg                           writemask;
593
594  // The ModR/M byte, which contains most register operands and some portion of
595  // all memory operands.
596  bool                          consumedModRM;
597  uint8_t                       modRM;
598
599  // The SIB byte, used for more complex 32- or 64-bit memory operands
600  uint8_t                       sib;
601
602  // The displacement, used for memory operands
603  int32_t                       displacement;
604
605  // Immediates.  There can be two in some cases
606  uint8_t                       numImmediatesConsumed;
607  uint8_t                       numImmediatesTranslated;
608  uint64_t                      immediates[2];
609
610  // A register or immediate operand encoded into the opcode
611  Reg                           opcodeRegister;
612
613  // Portions of the ModR/M byte
614
615  // These fields determine the allowable values for the ModR/M fields, which
616  // depend on operand and address widths.
617  EABase                        eaRegBase;
618  Reg                           regBase;
619
620  // The Mod and R/M fields can encode a base for an effective address, or a
621  // register.  These are separated into two fields here.
622  EABase                        eaBase;
623  EADisplacement                eaDisplacement;
624  // The reg field always encodes a register
625  Reg                           reg;
626
627  // SIB state
628  SIBIndex                      sibIndexBase;
629  SIBIndex                      sibIndex;
630  uint8_t                       sibScale;
631  SIBBase                       sibBase;
632
633  // Embedded rounding control.
634  uint8_t                       RC;
635
636  ArrayRef<OperandSpecifier> operands;
637};
638
639} // namespace X86Disassembler
640} // namespace llvm
641
642#endif
643