X86DisassemblerTables.h revision 226633
1//===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the X86 Disassembler Emitter.
11// It contains the interface of the disassembler tables.
12// Documentation for the disassembler emitter in general can be found in
13//  X86DisasemblerEmitter.h.
14//
15//===----------------------------------------------------------------------===//
16
17#ifndef X86DISASSEMBLERTABLES_H
18#define X86DISASSEMBLERTABLES_H
19
20#include "X86DisassemblerShared.h"
21#include "X86ModRMFilters.h"
22
23#include "llvm/Support/raw_ostream.h"
24
25#include <vector>
26
27namespace llvm {
28
29namespace X86Disassembler {
30
31/// DisassemblerTables - Encapsulates all the decode tables being generated by
32///   the table emitter.  Contains functions to populate the tables as well as
33///   to emit them as hierarchical C structures suitable for consumption by the
34///   runtime.
35class DisassemblerTables {
36private:
37  /// The decoder tables.  There is one for each opcode type:
38  /// [0] one-byte opcodes
39  /// [1] two-byte opcodes of the form 0f __
40  /// [2] three-byte opcodes of the form 0f 38 __
41  /// [3] three-byte opcodes of the form 0f 3a __
42  /// [4] three-byte opcodes of the form 0f a6 __
43  /// [5] three-byte opcodes of the form 0f a7 __
44  ContextDecision* Tables[6];
45
46  /// The instruction information table
47  std::vector<InstructionSpecifier> InstructionSpecifiers;
48
49  /// True if there are primary decode conflicts in the instruction set
50  bool HasConflicts;
51
52  /// emitOneID - Emits a table entry for a single instruction entry, at the
53  ///   innermost level of the structure hierarchy.  The entry is printed out
54  ///   in the format "nnnn, /* MNEMONIC */" where nnnn is the ID in decimal,
55  ///   the comma is printed if addComma is true, and the menonic is the name
56  ///   of the instruction as listed in the LLVM tables.
57  ///
58  /// @param o        - The output stream to print the entry on.
59  /// @param i        - The indentation level for o.
60  /// @param id       - The unique ID of the instruction to print.
61  /// @param addComma - Whether or not to print a comma after the ID.  True if
62  ///                    additional items will follow.
63  void emitOneID(raw_ostream &o,
64                 uint32_t &i,
65                 InstrUID id,
66                 bool addComma) const;
67
68  /// emitModRMDecision - Emits a table of entries corresponding to a single
69  ///   ModR/M decision.  Compacts the ModR/M decision if possible.  ModR/M
70  ///   decisions are printed as:
71  ///
72  ///   { /* struct ModRMDecision */
73  ///     TYPE,
74  ///     modRMTablennnn
75  ///   }
76  ///
77  ///   where nnnn is a unique ID for the corresponding table of IDs.
78  ///   TYPE indicates whether the table has one entry that is the same
79  ///   regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one
80  ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.
81  ///   nnnn is the number of a table for looking up these values.  The tables
82  ///   are written separately so that tables consisting entirely of zeros will
83  ///   not be duplicated.  (These all have the name modRMEmptyTable.)  A table
84  ///   is printed as:
85  ///
86  ///   InstrUID modRMTablennnn[k] = {
87  ///     nnnn, /* MNEMONIC */
88  ///     ...
89  ///     nnnn /* MNEMONIC */
90  ///   };
91  ///
92  /// @param o1       - The output stream to print the ID table to.
93  /// @param o2       - The output stream to print the decision structure to.
94  /// @param i1       - The indentation level to use with stream o1.
95  /// @param i2       - The indentation level to use with stream o2.
96  /// @param decision - The ModR/M decision to emit.  This decision has 256
97  ///                   entries - emitModRMDecision decides how to compact it.
98  void emitModRMDecision(raw_ostream &o1,
99                         raw_ostream &o2,
100                         uint32_t &i1,
101                         uint32_t &i2,
102                         ModRMDecision &decision) const;
103
104  /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M
105  ///   decisions.  An OpcodeDecision is printed as:
106  ///
107  ///   { /* struct OpcodeDecision */
108  ///     /* 0x00 */
109  ///     { /* struct ModRMDecision */
110  ///       ...
111  ///     }
112  ///     ...
113  ///   }
114  ///
115  ///   where the ModRMDecision structure is printed as described in the
116  ///   documentation for emitModRMDecision().  emitOpcodeDecision() passes on a
117  ///   stream and indent level for the UID tables generated by
118  ///   emitModRMDecision(), but does not use them itself.
119  ///
120  /// @param o1       - The output stream to print the ID tables generated by
121  ///                   emitModRMDecision() to.
122  /// @param o2       - The output stream for the decision structure itself.
123  /// @param i1       - The indent level to use with stream o1.
124  /// @param i2       - The indent level to use with stream o2.
125  /// @param decision - The OpcodeDecision to emit along with its subsidiary
126  ///                    structures.
127  void emitOpcodeDecision(raw_ostream &o1,
128                          raw_ostream &o2,
129                          uint32_t &i1,
130                          uint32_t &i2,
131                          OpcodeDecision &decision) const;
132
133  /// emitContextDecision - Emits a ContextDecision and all its subsidiary
134  ///   Opcode and ModRMDecisions.  A ContextDecision is printed as:
135  ///
136  ///   struct ContextDecision NAME = {
137  ///     { /* OpcodeDecisions */
138  ///       /* IC */
139  ///       { /* struct OpcodeDecision */
140  ///         ...
141  ///       },
142  ///       ...
143  ///     }
144  ///   }
145  ///
146  ///   NAME is the name of the ContextDecision (typically one of the four names
147  ///   ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM,
148  ///   THREEBYTEA6_SYM, and THREEBYTEA7_SYM from
149  ///   X86DisassemblerDecoderCommon.h).
150  ///   IC is one of the contexts in InstructionContext.  There is an opcode
151  ///   decision for each possible context.
152  ///   The OpcodeDecision structures are printed as described in the
153  ///   documentation for emitOpcodeDecision.
154  ///
155  /// @param o1       - The output stream to print the ID tables generated by
156  ///                   emitModRMDecision() to.
157  /// @param o2       - The output stream to print the decision structure to.
158  /// @param i1       - The indent level to use with stream o1.
159  /// @param i2       - The indent level to use with stream o2.
160  /// @param decision - The ContextDecision to emit along with its subsidiary
161  ///                   structures.
162  /// @param name     - The name for the ContextDecision.
163  void emitContextDecision(raw_ostream &o1,
164                           raw_ostream &o2,
165                           uint32_t &i1,
166                           uint32_t &i2,
167                           ContextDecision &decision,
168                           const char* name) const;
169
170  /// emitInstructionInfo - Prints the instruction specifier table, which has
171  ///   one entry for each instruction, and contains name and operand
172  ///   information.  This table is printed as:
173  ///
174  ///   struct InstructionSpecifier CONTEXTS_SYM[k] = {
175  ///     {
176  ///       /* nnnn */
177  ///       "MNEMONIC",
178  ///       0xnn,
179  ///       {
180  ///         {
181  ///           ENCODING,
182  ///           TYPE
183  ///         },
184  ///         ...
185  ///       }
186  ///     },
187  ///   };
188  ///
189  ///   k is the total number of instructions.
190  ///   nnnn is the ID of the current instruction (0-based).  This table
191  ///   includes entries for non-instructions like PHINODE.
192  ///   0xnn is the lowest possible opcode for the current instruction, used for
193  ///   AddRegFrm instructions to compute the operand's value.
194  ///   ENCODING and TYPE describe the encoding and type for a single operand.
195  ///
196  /// @param o  - The output stream to which the instruction table should be
197  ///             written.
198  /// @param i  - The indent level for use with the stream.
199  void emitInstructionInfo(raw_ostream &o, uint32_t &i) const;
200
201  /// emitContextTable - Prints the table that is used to translate from an
202  ///   instruction attribute mask to an instruction context.  This table is
203  ///   printed as:
204  ///
205  ///   InstructionContext CONTEXTS_STR[256] = {
206  ///     IC, /* 0x00 */
207  ///     ...
208  ///   };
209  ///
210  ///   IC is the context corresponding to the mask 0x00, and there are 256
211  ///   possible masks.
212  ///
213  /// @param o  - The output stream to which the context table should be written.
214  /// @param i  - The indent level for use with the stream.
215  void emitContextTable(raw_ostream &o, uint32_t &i) const;
216
217  /// emitContextDecisions - Prints all four ContextDecision structures using
218  ///   emitContextDecision().
219  ///
220  /// @param o1 - The output stream to print the ID tables generated by
221  ///             emitModRMDecision() to.
222  /// @param o2 - The output stream to print the decision structures to.
223  /// @param i1 - The indent level to use with stream o1.
224  /// @param i2 - The indent level to use with stream o2.
225  void emitContextDecisions(raw_ostream &o1,
226                            raw_ostream &o2,
227                            uint32_t &i1,
228                            uint32_t &i2) const;
229
230  /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a
231  ///   ModRMDecision to refer to a particular instruction ID.
232  ///
233  /// @param decision - The ModRMDecision to populate.
234  /// @param filter   - The filter to use in deciding which entries to populate.
235  /// @param uid      - The unique ID to set matching entries to.
236  /// @param opcode   - The opcode of the instruction, for error reporting.
237  void setTableFields(ModRMDecision &decision,
238                      const ModRMFilter &filter,
239                      InstrUID uid,
240                      uint8_t opcode);
241public:
242  /// Constructor - Allocates space for the class decisions and clears them.
243  DisassemblerTables();
244
245  ~DisassemblerTables();
246
247  /// emit - Emits the instruction table, context table, and class decisions.
248  ///
249  /// @param o  - The output stream to print the tables to.
250  void emit(raw_ostream &o) const;
251
252  /// setTableFields - Uses the opcode type, instruction context, opcode, and a
253  ///   ModRMFilter as criteria to set a particular set of entries in the
254  ///   decode tables to point to a specific uid.
255  ///
256  /// @param type         - The opcode type (ONEBYTE, TWOBYTE, etc.)
257  /// @param insnContext  - The context to use (IC, IC_64BIT, etc.)
258  /// @param opcode       - The last byte of the opcode (not counting any escape
259  ///                       or extended opcodes).
260  /// @param filter       - The ModRMFilter that decides which ModR/M byte values
261  ///                       correspond to the desired instruction.
262  /// @param uid          - The unique ID of the instruction.
263  /// @param is32bit      - Instructon is only 32-bit
264  /// @param ignoresVEX_L - Instruction ignores VEX.L
265  void setTableFields(OpcodeType type,
266                      InstructionContext insnContext,
267                      uint8_t opcode,
268                      const ModRMFilter &filter,
269                      InstrUID uid,
270                      bool is32bit,
271                      bool ignoresVEX_L);
272
273  /// specForUID - Returns the instruction specifier for a given unique
274  ///   instruction ID.  Used when resolving collisions.
275  ///
276  /// @param uid  - The unique ID of the instruction.
277  /// @return     - A reference to the instruction specifier.
278  InstructionSpecifier& specForUID(InstrUID uid) {
279    if (uid >= InstructionSpecifiers.size())
280      InstructionSpecifiers.resize(uid + 1);
281
282    return InstructionSpecifiers[uid];
283  }
284
285  // hasConflicts - Reports whether there were primary decode conflicts
286  //   from any instructions added to the tables.
287  // @return  - true if there were; false otherwise.
288
289  bool hasConflicts() {
290    return HasConflicts;
291  }
292};
293
294} // namespace X86Disassembler
295
296} // namespace llvm
297
298#endif
299