1199989Srdivacky//===- DisassemblerEmitter.cpp - Generate a disassembler ------------------===//
2199989Srdivacky//
3199989Srdivacky//                     The LLVM Compiler Infrastructure
4199989Srdivacky//
5199989Srdivacky// This file is distributed under the University of Illinois Open Source
6199989Srdivacky// License. See LICENSE.TXT for details.
7199989Srdivacky//
8199989Srdivacky//===----------------------------------------------------------------------===//
9199989Srdivacky
10199989Srdivacky#include "CodeGenTarget.h"
11201360Srdivacky#include "X86DisassemblerTables.h"
12201360Srdivacky#include "X86RecognizableInstr.h"
13226633Sdim#include "llvm/TableGen/Error.h"
14226633Sdim#include "llvm/TableGen/Record.h"
15239462Sdim#include "llvm/TableGen/TableGenBackend.h"
16206124Srdivacky
17199989Srdivackyusing namespace llvm;
18201360Srdivackyusing namespace llvm::X86Disassembler;
19199989Srdivacky
20201360Srdivacky/// DisassemblerEmitter - Contains disassembler table emitters for various
21201360Srdivacky/// architectures.
22201360Srdivacky
23201360Srdivacky/// X86 Disassembler Emitter
24201360Srdivacky///
25201360Srdivacky/// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR
26201360Srdivacky///     THE END OF THIS COMMENT!
27201360Srdivacky///
28201360Srdivacky/// The X86 disassembler emitter is part of the X86 Disassembler, which is
29201360Srdivacky/// documented in lib/Target/X86/X86Disassembler.h.
30201360Srdivacky///
31201360Srdivacky/// The emitter produces the tables that the disassembler uses to translate
32201360Srdivacky/// instructions.  The emitter generates the following tables:
33201360Srdivacky///
34201360Srdivacky/// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to
35201360Srdivacky///   instruction contexts.  Although for each attribute there are cases where
36201360Srdivacky///   that attribute determines decoding, in the majority of cases decoding is
37201360Srdivacky///   the same whether or not an attribute is present.  For example, a 64-bit
38201360Srdivacky///   instruction with an OPSIZE prefix and an XS prefix decodes the same way in
39201360Srdivacky///   all cases as a 64-bit instruction with only OPSIZE set.  (The XS prefix
40201360Srdivacky///   may have effects on its execution, but does not change the instruction
41201360Srdivacky///   returned.)  This allows considerable space savings in other tables.
42221345Sdim/// - Six tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM,
43221345Sdim///   THREEBYTEA6_SYM, and THREEBYTEA7_SYM contain the hierarchy that the
44221345Sdim///   decoder traverses while decoding an instruction.  At the lowest level of
45221345Sdim///   this hierarchy are instruction UIDs, 16-bit integers that can be used to
46221345Sdim///   uniquely identify the instruction and correspond exactly to its position
47221345Sdim///   in the list of CodeGenInstructions for the target.
48201360Srdivacky/// - One table (INSTRUCTIONS_SYM) contains information about the operands of
49201360Srdivacky///   each instruction and how to decode them.
50201360Srdivacky///
51201360Srdivacky/// During table generation, there may be conflicts between instructions that
52201360Srdivacky/// occupy the same space in the decode tables.  These conflicts are resolved as
53201360Srdivacky/// follows in setTableFields() (X86DisassemblerTables.cpp)
54201360Srdivacky///
55201360Srdivacky/// - If the current context is the native context for one of the instructions
56201360Srdivacky///   (that is, the attributes specified for it in the LLVM tables specify
57201360Srdivacky///   precisely the current context), then it has priority.
58201360Srdivacky/// - If the current context isn't native for either of the instructions, then
59201360Srdivacky///   the higher-priority context wins (that is, the one that is more specific).
60201360Srdivacky///   That hierarchy is determined by outranks() (X86DisassemblerTables.cpp)
61201360Srdivacky/// - If the current context is native for both instructions, then the table
62201360Srdivacky///   emitter reports a conflict and dies.
63201360Srdivacky///
64201360Srdivacky/// *** RESOLUTION FOR "Primary decode conflict"S
65201360Srdivacky///
66201360Srdivacky/// If two instructions collide, typically the solution is (in order of
67201360Srdivacky/// likelihood):
68201360Srdivacky///
69201360Srdivacky/// (1) to filter out one of the instructions by editing filter()
70201360Srdivacky///     (X86RecognizableInstr.cpp).  This is the most common resolution, but
71201360Srdivacky///     check the Intel manuals first to make sure that (2) and (3) are not the
72201360Srdivacky///     problem.
73201360Srdivacky/// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are
74201360Srdivacky///     accurate.  Sometimes they are not.
75201360Srdivacky/// (3) to fix the tables to reflect the actual context (for example, required
76201360Srdivacky///     prefixes), and possibly to add a new context by editing
77201360Srdivacky///     lib/Target/X86/X86DisassemblerDecoderCommon.h.  This is unlikely to be
78201360Srdivacky///     the cause.
79201360Srdivacky///
80201360Srdivacky/// DisassemblerEmitter.cpp contains the implementation for the emitter,
81201360Srdivacky///   which simply pulls out instructions from the CodeGenTarget and pushes them
82201360Srdivacky///   into X86DisassemblerTables.
83201360Srdivacky/// X86DisassemblerTables.h contains the interface for the instruction tables,
84201360Srdivacky///   which manage and emit the structures discussed above.
85201360Srdivacky/// X86DisassemblerTables.cpp contains the implementation for the instruction
86201360Srdivacky///   tables.
87201360Srdivacky/// X86ModRMFilters.h contains filters that can be used to determine which
88201360Srdivacky///   ModR/M values are valid for a particular instruction.  These are used to
89201360Srdivacky///   populate ModRMDecisions.
90201360Srdivacky/// X86RecognizableInstr.h contains the interface for a single instruction,
91201360Srdivacky///   which knows how to translate itself from a CodeGenInstruction and provide
92201360Srdivacky///   the information necessary for integration into the tables.
93201360Srdivacky/// X86RecognizableInstr.cpp contains the implementation for a single
94201360Srdivacky///   instruction.
95201360Srdivacky
96239462Sdimnamespace llvm {
97239462Sdim
98239462Sdimextern void EmitFixedLenDecoder(RecordKeeper &RK, raw_ostream &OS,
99239462Sdim                                std::string PredicateNamespace,
100239462Sdim                                std::string GPrefix,
101239462Sdim                                std::string GPostfix,
102239462Sdim                                std::string ROK,
103239462Sdim                                std::string RFail,
104239462Sdim                                std::string L);
105239462Sdim
106239462Sdimvoid EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
107218893Sdim  CodeGenTarget Target(Records);
108239462Sdim  emitSourceFileHeader(" * " + Target.getName() + " Disassembler", OS);
109199989Srdivacky
110201360Srdivacky  // X86 uses a custom disassembler.
111201360Srdivacky  if (Target.getName() == "X86") {
112201360Srdivacky    DisassemblerTables Tables;
113239462Sdim
114205407Srdivacky    const std::vector<const CodeGenInstruction*> &numberedInstructions =
115205407Srdivacky      Target.getInstructionsByEnumValue();
116239462Sdim
117201360Srdivacky    for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
118201360Srdivacky      RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
119201360Srdivacky
120201360Srdivacky    if (Tables.hasConflicts())
121243830Sdim      PrintFatalError(Target.getTargetRecord()->getLoc(),
122243830Sdim                      "Primary decode conflict");
123201360Srdivacky
124201360Srdivacky    Tables.emit(OS);
125201360Srdivacky    return;
126201360Srdivacky  }
127201360Srdivacky
128226633Sdim  // ARM and Thumb have a CHECK() macro to deal with DecodeStatuses.
129226633Sdim  if (Target.getName() == "ARM" ||
130249423Sdim      Target.getName() == "Thumb" ||
131249423Sdim      Target.getName() == "AArch64") {
132249423Sdim    EmitFixedLenDecoder(Records, OS, Target.getName() == "AArch64" ? "AArch64" : "ARM",
133239462Sdim                        "if (!Check(S, ", ")) return MCDisassembler::Fail;",
134239462Sdim                        "S", "MCDisassembler::Fail",
135239462Sdim                        "  MCDisassembler::DecodeStatus S = "
136239462Sdim                          "MCDisassembler::Success;\n(void)S;");
137206124Srdivacky    return;
138226633Sdim  }
139206124Srdivacky
140239462Sdim  EmitFixedLenDecoder(Records, OS, Target.getName(),
141239462Sdim                      "if (", " == MCDisassembler::Fail)"
142239462Sdim                       " return MCDisassembler::Fail;",
143239462Sdim                      "MCDisassembler::Success", "MCDisassembler::Fail", "");
144199989Srdivacky}
145239462Sdim
146239462Sdim} // End llvm namespace
147