X86Disassembler.h revision 249423
1201360Srdivacky//===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===//
2201360Srdivacky//
3201360Srdivacky//                     The LLVM Compiler Infrastructure
4201360Srdivacky//
5201360Srdivacky// This file is distributed under the University of Illinois Open Source
6201360Srdivacky// License. See LICENSE.TXT for details.
7201360Srdivacky//
8201360Srdivacky//===----------------------------------------------------------------------===//
9201360Srdivacky//
10201360Srdivacky// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
11201360Srdivacky// 64-bit X86 instruction sets.  The main decode sequence for an assembly
12201360Srdivacky// instruction in this disassembler is:
13201360Srdivacky//
14201360Srdivacky// 1. Read the prefix bytes and determine the attributes of the instruction.
15201360Srdivacky//    These attributes, recorded in enum attributeBits
16201360Srdivacky//    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM
17201360Srdivacky//    provides a mapping from bitmasks to contexts, which are represented by
18201360Srdivacky//    enum InstructionContext (ibid.).
19201360Srdivacky//
20201360Srdivacky// 2. Read the opcode, and determine what kind of opcode it is.  The
21201360Srdivacky//    disassembler distinguishes four kinds of opcodes, which are enumerated in
22201360Srdivacky//    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
23201360Srdivacky//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
24201360Srdivacky//    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
25201360Srdivacky//
26201360Srdivacky// 3. Depending on the opcode type, look in one of four ClassDecision structures
27201360Srdivacky//    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which
28201360Srdivacky//    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get
29201360Srdivacky//    a ModRMDecision (ibid.).
30201360Srdivacky//
31201360Srdivacky// 4. Some instructions, such as escape opcodes or extended opcodes, or even
32201360Srdivacky//    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
33201360Srdivacky//    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from
34201360Srdivacky//    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
35201360Srdivacky//    ModR/M byte is required and how to interpret it.
36201360Srdivacky//
37201360Srdivacky// 5. After resolving the ModRMDecision, the disassembler has a unique ID
38201360Srdivacky//    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in
39201360Srdivacky//    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
40201360Srdivacky//    meanings of its operands.
41201360Srdivacky//
42201360Srdivacky// 6. For each operand, its encoding is an entry from OperandEncoding
43201360Srdivacky//    (X86DisassemblerDecoderCommon.h) and its type is an entry from
44201360Srdivacky//    OperandType (ibid.).  The encoding indicates how to read it from the
45201360Srdivacky//    instruction; the type indicates how to interpret the value once it has
46201360Srdivacky//    been read.  For example, a register operand could be stored in the R/M
47201360Srdivacky//    field of the ModR/M byte, the REG field of the ModR/M byte, or added to
48201360Srdivacky//    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM
49201360Srdivacky//    register, for instance).  Given this information, the operands can be
50201360Srdivacky//    extracted and interpreted.
51201360Srdivacky//
52201360Srdivacky// 7. As the last step, the disassembler translates the instruction information
53201360Srdivacky//    and operands into a format understandable by the client - in this case, an
54201360Srdivacky//    MCInst for use by the MC infrastructure.
55201360Srdivacky//
56201360Srdivacky// The disassembler is broken broadly into two parts: the table emitter that
57201360Srdivacky// emits the instruction decode tables discussed above during compilation, and
58201360Srdivacky// the disassembler itself.  The table emitter is documented in more detail in
59201360Srdivacky// utils/TableGen/X86DisassemblerEmitter.h.
60201360Srdivacky//
61201360Srdivacky// X86Disassembler.h contains the public interface for the disassembler,
62201360Srdivacky//   adhering to the MCDisassembler interface.
63201360Srdivacky// X86Disassembler.cpp contains the code responsible for step 7, and for
64201360Srdivacky//   invoking the decoder to execute steps 1-6.
65201360Srdivacky// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
66201360Srdivacky//   table emitter and the disassembler.
67201360Srdivacky// X86DisassemblerDecoder.h contains the public interface of the decoder,
68201360Srdivacky//   factored out into C for possible use by other projects.
69201360Srdivacky// X86DisassemblerDecoder.c contains the source code of the decoder, which is
70201360Srdivacky//   responsible for steps 1-6.
71201360Srdivacky//
72201360Srdivacky//===----------------------------------------------------------------------===//
73201360Srdivacky
74201360Srdivacky#ifndef X86DISASSEMBLER_H
75201360Srdivacky#define X86DISASSEMBLER_H
76201360Srdivacky
77201360Srdivacky#define INSTRUCTION_SPECIFIER_FIELDS \
78201360Srdivacky  uint16_t operands;
79201360Srdivacky
80201360Srdivacky#define INSTRUCTION_IDS               \
81201360Srdivacky  uint16_t instructionIDs;
82201360Srdivacky
83201360Srdivacky#include "X86DisassemblerDecoderCommon.h"
84201360Srdivacky
85201360Srdivacky#undef INSTRUCTION_SPECIFIER_FIELDS
86201360Srdivacky#undef INSTRUCTION_IDS
87201360Srdivacky
88201360Srdivacky#include "llvm/MC/MCDisassembler.h"
89201360Srdivacky
90201360Srdivackynamespace llvm {
91201360Srdivacky
92201360Srdivackyclass MCInst;
93201360Srdivackyclass MCInstrInfo;
94201360Srdivackyclass MCSubtargetInfo;
95201360Srdivackyclass MemoryObject;
96201360Srdivackyclass raw_ostream;
97201360Srdivacky
98201360Srdivackynamespace X86Disassembler {
99201360Srdivacky
100201360Srdivacky/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
101201360Srdivacky///   All each platform class should have to do is subclass the constructor, and
102201360Srdivacky///   provide a different disassemblerMode value.
103201360Srdivackyclass X86GenericDisassembler : public MCDisassembler {
104201360Srdivacky  const MCInstrInfo *MII;
105201360Srdivackypublic:
106201360Srdivacky  /// Constructor     - Initializes the disassembler.
107201360Srdivacky  ///
108201360Srdivacky  /// @param mode     - The X86 architecture mode to decode for.
109201360Srdivacky  X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode,
110201360Srdivacky                         const MCInstrInfo *MII);
111201360Srdivackyprivate:
112201360Srdivacky  ~X86GenericDisassembler();
113201360Srdivackypublic:
114201360Srdivacky
115201360Srdivacky  /// getInstruction - See MCDisassembler.
116201360Srdivacky  DecodeStatus getInstruction(MCInst &instr,
117201360Srdivacky                              uint64_t &size,
118201360Srdivacky                              const MemoryObject &region,
119201360Srdivacky                              uint64_t address,
120201360Srdivacky                              raw_ostream &vStream,
121201360Srdivacky                              raw_ostream &cStream) const;
122201360Srdivacky
123201360Srdivackyprivate:
124201360Srdivacky  DisassemblerMode              fMode;
125201360Srdivacky};
126201360Srdivacky
127201360Srdivacky} // namespace X86Disassembler
128201360Srdivacky
129201360Srdivacky} // namespace llvm
130201360Srdivacky
131201360Srdivacky#endif
132201360Srdivacky