1//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the function that lexes the machine instruction source
10// string.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
15#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
16
17#include "llvm/ADT/APSInt.h"
18#include "llvm/ADT/StringRef.h"
19#include <string>
20
21namespace llvm {
22
23class Twine;
24
25/// A token produced by the machine instruction lexer.
26struct MIToken {
27  enum TokenKind {
28    // Markers
29    Eof,
30    Error,
31    Newline,
32
33    // Tokens with no info.
34    comma,
35    equal,
36    underscore,
37    colon,
38    coloncolon,
39    dot,
40    exclaim,
41    lparen,
42    rparen,
43    lbrace,
44    rbrace,
45    plus,
46    minus,
47    less,
48    greater,
49
50    // Keywords
51    kw_implicit,
52    kw_implicit_define,
53    kw_def,
54    kw_dead,
55    kw_dereferenceable,
56    kw_killed,
57    kw_undef,
58    kw_internal,
59    kw_early_clobber,
60    kw_debug_use,
61    kw_renamable,
62    kw_tied_def,
63    kw_frame_setup,
64    kw_frame_destroy,
65    kw_nnan,
66    kw_ninf,
67    kw_nsz,
68    kw_arcp,
69    kw_contract,
70    kw_afn,
71    kw_reassoc,
72    kw_nuw,
73    kw_nsw,
74    kw_exact,
75    kw_nofpexcept,
76    kw_debug_location,
77    kw_debug_instr_number,
78    kw_dbg_instr_ref,
79    kw_cfi_same_value,
80    kw_cfi_offset,
81    kw_cfi_rel_offset,
82    kw_cfi_def_cfa_register,
83    kw_cfi_def_cfa_offset,
84    kw_cfi_adjust_cfa_offset,
85    kw_cfi_escape,
86    kw_cfi_def_cfa,
87    kw_cfi_llvm_def_aspace_cfa,
88    kw_cfi_register,
89    kw_cfi_remember_state,
90    kw_cfi_restore,
91    kw_cfi_restore_state,
92    kw_cfi_undefined,
93    kw_cfi_window_save,
94    kw_cfi_aarch64_negate_ra_sign_state,
95    kw_blockaddress,
96    kw_intrinsic,
97    kw_target_index,
98    kw_half,
99    kw_float,
100    kw_double,
101    kw_x86_fp80,
102    kw_fp128,
103    kw_ppc_fp128,
104    kw_target_flags,
105    kw_volatile,
106    kw_non_temporal,
107    kw_invariant,
108    kw_align,
109    kw_basealign,
110    kw_addrspace,
111    kw_stack,
112    kw_got,
113    kw_jump_table,
114    kw_constant_pool,
115    kw_call_entry,
116    kw_custom,
117    kw_liveout,
118    kw_landing_pad,
119    kw_inlineasm_br_indirect_target,
120    kw_ehfunclet_entry,
121    kw_liveins,
122    kw_successors,
123    kw_floatpred,
124    kw_intpred,
125    kw_shufflemask,
126    kw_pre_instr_symbol,
127    kw_post_instr_symbol,
128    kw_heap_alloc_marker,
129    kw_pcsections,
130    kw_cfi_type,
131    kw_bbsections,
132    kw_bb_id,
133    kw_unknown_size,
134    kw_unknown_address,
135    kw_ir_block_address_taken,
136    kw_machine_block_address_taken,
137
138    // Metadata types.
139    kw_distinct,
140
141    // Named metadata keywords
142    md_tbaa,
143    md_alias_scope,
144    md_noalias,
145    md_range,
146    md_diexpr,
147    md_dilocation,
148
149    // Identifier tokens
150    Identifier,
151    NamedRegister,
152    NamedVirtualRegister,
153    MachineBasicBlockLabel,
154    MachineBasicBlock,
155    StackObject,
156    FixedStackObject,
157    NamedGlobalValue,
158    GlobalValue,
159    ExternalSymbol,
160    MCSymbol,
161
162    // Other tokens
163    IntegerLiteral,
164    FloatingPointLiteral,
165    HexLiteral,
166    VectorLiteral,
167    VirtualRegister,
168    ConstantPoolItem,
169    JumpTableIndex,
170    NamedIRBlock,
171    IRBlock,
172    NamedIRValue,
173    IRValue,
174    QuotedIRValue, // `<constant value>`
175    SubRegisterIndex,
176    StringConstant
177  };
178
179private:
180  TokenKind Kind = Error;
181  StringRef Range;
182  StringRef StringValue;
183  std::string StringValueStorage;
184  APSInt IntVal;
185
186public:
187  MIToken() = default;
188
189  MIToken &reset(TokenKind Kind, StringRef Range);
190
191  MIToken &setStringValue(StringRef StrVal);
192  MIToken &setOwnedStringValue(std::string StrVal);
193  MIToken &setIntegerValue(APSInt IntVal);
194
195  TokenKind kind() const { return Kind; }
196
197  bool isError() const { return Kind == Error; }
198
199  bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
200
201  bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
202
203  bool isRegister() const {
204    return Kind == NamedRegister || Kind == underscore ||
205           Kind == NamedVirtualRegister || Kind == VirtualRegister;
206  }
207
208  bool isRegisterFlag() const {
209    return Kind == kw_implicit || Kind == kw_implicit_define ||
210           Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
211           Kind == kw_undef || Kind == kw_internal ||
212           Kind == kw_early_clobber || Kind == kw_debug_use ||
213           Kind == kw_renamable;
214  }
215
216  bool isMemoryOperandFlag() const {
217    return Kind == kw_volatile || Kind == kw_non_temporal ||
218           Kind == kw_dereferenceable || Kind == kw_invariant ||
219           Kind == StringConstant;
220  }
221
222  bool is(TokenKind K) const { return Kind == K; }
223
224  bool isNot(TokenKind K) const { return Kind != K; }
225
226  StringRef::iterator location() const { return Range.begin(); }
227
228  StringRef range() const { return Range; }
229
230  /// Return the token's string value.
231  StringRef stringValue() const { return StringValue; }
232
233  const APSInt &integerValue() const { return IntVal; }
234
235  bool hasIntegerValue() const {
236    return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
237           Kind == MachineBasicBlockLabel || Kind == StackObject ||
238           Kind == FixedStackObject || Kind == GlobalValue ||
239           Kind == VirtualRegister || Kind == ConstantPoolItem ||
240           Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
241  }
242};
243
244/// Consume a single machine instruction token in the given source and return
245/// the remaining source string.
246StringRef lexMIToken(
247    StringRef Source, MIToken &Token,
248    function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
249
250} // end namespace llvm
251
252#endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
253