1234353Sdim//===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===// 2201360Srdivacky// 3201360Srdivacky// The LLVM Compiler Infrastructure 4201360Srdivacky// 5201360Srdivacky// This file is distributed under the University of Illinois Open Source 6201360Srdivacky// License. See LICENSE.TXT for details. 7201360Srdivacky// 8201360Srdivacky//===----------------------------------------------------------------------===// 9201360Srdivacky// 10201360Srdivacky// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and 11201360Srdivacky// 64-bit X86 instruction sets. The main decode sequence for an assembly 12201360Srdivacky// instruction in this disassembler is: 13201360Srdivacky// 14201360Srdivacky// 1. Read the prefix bytes and determine the attributes of the instruction. 15201360Srdivacky// These attributes, recorded in enum attributeBits 16201360Srdivacky// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM 17201360Srdivacky// provides a mapping from bitmasks to contexts, which are represented by 18201360Srdivacky// enum InstructionContext (ibid.). 19201360Srdivacky// 20201360Srdivacky// 2. Read the opcode, and determine what kind of opcode it is. The 21201360Srdivacky// disassembler distinguishes four kinds of opcodes, which are enumerated in 22201360Srdivacky// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte 23239462Sdim// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 24201360Srdivacky// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. 25201360Srdivacky// 26201360Srdivacky// 3. Depending on the opcode type, look in one of four ClassDecision structures 27201360Srdivacky// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which 28201360Srdivacky// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get 29201360Srdivacky// a ModRMDecision (ibid.). 30201360Srdivacky// 31201360Srdivacky// 4. Some instructions, such as escape opcodes or extended opcodes, or even 32201360Srdivacky// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the 33201360Srdivacky// ModR/M byte to complete decode. The ModRMDecision's type is an entry from 34201360Srdivacky// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the 35201360Srdivacky// ModR/M byte is required and how to interpret it. 36201360Srdivacky// 37201360Srdivacky// 5. After resolving the ModRMDecision, the disassembler has a unique ID 38201360Srdivacky// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in 39201360Srdivacky// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and 40201360Srdivacky// meanings of its operands. 41201360Srdivacky// 42201360Srdivacky// 6. For each operand, its encoding is an entry from OperandEncoding 43201360Srdivacky// (X86DisassemblerDecoderCommon.h) and its type is an entry from 44201360Srdivacky// OperandType (ibid.). The encoding indicates how to read it from the 45201360Srdivacky// instruction; the type indicates how to interpret the value once it has 46201360Srdivacky// been read. For example, a register operand could be stored in the R/M 47201360Srdivacky// field of the ModR/M byte, the REG field of the ModR/M byte, or added to 48201360Srdivacky// the main opcode. This is orthogonal from its meaning (an GPR or an XMM 49201360Srdivacky// register, for instance). Given this information, the operands can be 50201360Srdivacky// extracted and interpreted. 51201360Srdivacky// 52201360Srdivacky// 7. As the last step, the disassembler translates the instruction information 53201360Srdivacky// and operands into a format understandable by the client - in this case, an 54201360Srdivacky// MCInst for use by the MC infrastructure. 55201360Srdivacky// 56201360Srdivacky// The disassembler is broken broadly into two parts: the table emitter that 57201360Srdivacky// emits the instruction decode tables discussed above during compilation, and 58201360Srdivacky// the disassembler itself. The table emitter is documented in more detail in 59201360Srdivacky// utils/TableGen/X86DisassemblerEmitter.h. 60201360Srdivacky// 61201360Srdivacky// X86Disassembler.h contains the public interface for the disassembler, 62201360Srdivacky// adhering to the MCDisassembler interface. 63201360Srdivacky// X86Disassembler.cpp contains the code responsible for step 7, and for 64201360Srdivacky// invoking the decoder to execute steps 1-6. 65201360Srdivacky// X86DisassemblerDecoderCommon.h contains the definitions needed by both the 66201360Srdivacky// table emitter and the disassembler. 67201360Srdivacky// X86DisassemblerDecoder.h contains the public interface of the decoder, 68201360Srdivacky// factored out into C for possible use by other projects. 69201360Srdivacky// X86DisassemblerDecoder.c contains the source code of the decoder, which is 70201360Srdivacky// responsible for steps 1-6. 71201360Srdivacky// 72201360Srdivacky//===----------------------------------------------------------------------===// 73201360Srdivacky 74201360Srdivacky#ifndef X86DISASSEMBLER_H 75201360Srdivacky#define X86DISASSEMBLER_H 76201360Srdivacky 77239462Sdim#define INSTRUCTION_SPECIFIER_FIELDS \ 78239462Sdim uint16_t operands; 79201360Srdivacky 80201360Srdivacky#define INSTRUCTION_IDS \ 81243830Sdim uint16_t instructionIDs; 82201360Srdivacky 83201360Srdivacky#include "X86DisassemblerDecoderCommon.h" 84201360Srdivacky 85201360Srdivacky#undef INSTRUCTION_SPECIFIER_FIELDS 86201360Srdivacky#undef INSTRUCTION_IDS 87201360Srdivacky 88201360Srdivacky#include "llvm/MC/MCDisassembler.h" 89201360Srdivacky 90201360Srdivackynamespace llvm { 91239462Sdim 92201360Srdivackyclass MCInst; 93234353Sdimclass MCInstrInfo; 94226633Sdimclass MCSubtargetInfo; 95201360Srdivackyclass MemoryObject; 96201360Srdivackyclass raw_ostream; 97207618Srdivacky 98201360Srdivackynamespace X86Disassembler { 99201360Srdivacky 100201360Srdivacky/// X86GenericDisassembler - Generic disassembler for all X86 platforms. 101201360Srdivacky/// All each platform class should have to do is subclass the constructor, and 102201360Srdivacky/// provide a different disassemblerMode value. 103201360Srdivackyclass X86GenericDisassembler : public MCDisassembler { 104234353Sdim const MCInstrInfo *MII; 105234353Sdimpublic: 106201360Srdivacky /// Constructor - Initializes the disassembler. 107201360Srdivacky /// 108201360Srdivacky /// @param mode - The X86 architecture mode to decode for. 109234353Sdim X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode, 110234353Sdim const MCInstrInfo *MII); 111234353Sdimprivate: 112234353Sdim ~X86GenericDisassembler(); 113201360Srdivackypublic: 114201360Srdivacky 115201360Srdivacky /// getInstruction - See MCDisassembler. 116226633Sdim DecodeStatus getInstruction(MCInst &instr, 117226633Sdim uint64_t &size, 118226633Sdim const MemoryObject ®ion, 119226633Sdim uint64_t address, 120226633Sdim raw_ostream &vStream, 121226633Sdim raw_ostream &cStream) const; 122207618Srdivacky 123201360Srdivackyprivate: 124201360Srdivacky DisassemblerMode fMode; 125201360Srdivacky}; 126201360Srdivacky 127234353Sdim} // namespace X86Disassembler 128201360Srdivacky 129234353Sdim} // namespace llvm 130201360Srdivacky 131201360Srdivacky#endif 132