MCTargetAsmParser.h revision 360784
1//===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10#define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11
12#include "llvm/ADT/StringRef.h"
13#include "llvm/MC/MCExpr.h"
14#include "llvm/MC/MCInstrInfo.h"
15#include "llvm/MC/MCParser/MCAsmLexer.h"
16#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
17#include "llvm/MC/MCParser/MCAsmParserExtension.h"
18#include "llvm/MC/MCTargetOptions.h"
19#include "llvm/MC/SubtargetFeature.h"
20#include "llvm/Support/SMLoc.h"
21#include <cstdint>
22#include <memory>
23
24namespace llvm {
25
26class MCInst;
27class MCParsedAsmOperand;
28class MCStreamer;
29class MCSubtargetInfo;
30template <typename T> class SmallVectorImpl;
31
32using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
33
34enum AsmRewriteKind {
35  AOK_Align,          // Rewrite align as .align.
36  AOK_EVEN,           // Rewrite even as .even.
37  AOK_Emit,           // Rewrite _emit as .byte.
38  AOK_CallInput,      // Rewrite in terms of ${N:P}.
39  AOK_Input,          // Rewrite in terms of $N.
40  AOK_Output,         // Rewrite in terms of $N.
41  AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
42  AOK_Label,          // Rewrite local labels.
43  AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
44  AOK_Skip,           // Skip emission (e.g., offset/type operators).
45  AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
46};
47
48const char AsmRewritePrecedence [] = {
49  2, // AOK_Align
50  2, // AOK_EVEN
51  2, // AOK_Emit
52  3, // AOK_Input
53  3, // AOK_CallInput
54  3, // AOK_Output
55  5, // AOK_SizeDirective
56  1, // AOK_Label
57  5, // AOK_EndOfStatement
58  2, // AOK_Skip
59  2  // AOK_IntelExpr
60};
61
62// Represnt the various parts which makes up an intel expression,
63// used for emitting compound intel expressions
64struct IntelExpr {
65  bool NeedBracs;
66  int64_t Imm;
67  StringRef BaseReg;
68  StringRef IndexReg;
69  StringRef OffsetName;
70  unsigned Scale;
71
72  IntelExpr()
73      : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()),
74        OffsetName(StringRef()), Scale(1) {}
75  // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
76  IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
77            StringRef offsetName, int64_t imm, bool needBracs)
78      : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
79        OffsetName(offsetName), Scale(1) {
80    if (scale)
81      Scale = scale;
82  }
83  bool hasBaseReg() const { return !BaseReg.empty(); }
84  bool hasIndexReg() const { return !IndexReg.empty(); }
85  bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
86  bool hasOffset() const { return !OffsetName.empty(); }
87  // Normally we won't emit immediates unconditionally,
88  // unless we've got no other components
89  bool emitImm() const { return !(hasRegs() || hasOffset()); }
90  bool isValid() const {
91    return (Scale == 1) ||
92           (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
93  }
94};
95
96struct AsmRewrite {
97  AsmRewriteKind Kind;
98  SMLoc Loc;
99  unsigned Len;
100  bool Done;
101  int64_t Val;
102  StringRef Label;
103  IntelExpr IntelExp;
104
105public:
106  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
107    : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {}
108  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
109    : AsmRewrite(kind, loc, len) { Label = label; }
110  AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
111    : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
112};
113
114struct ParseInstructionInfo {
115  SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
116
117  ParseInstructionInfo() = default;
118  ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
119    : AsmRewrites(rewrites) {}
120};
121
122enum OperandMatchResultTy {
123  MatchOperand_Success,  // operand matched successfully
124  MatchOperand_NoMatch,  // operand did not match
125  MatchOperand_ParseFail // operand matched but had errors
126};
127
128enum class DiagnosticPredicateTy {
129  Match,
130  NearMatch,
131  NoMatch,
132};
133
134// When an operand is parsed, the assembler will try to iterate through a set of
135// possible operand classes that the operand might match and call the
136// corresponding PredicateMethod to determine that.
137//
138// If there are two AsmOperands that would give a specific diagnostic if there
139// is no match, there is currently no mechanism to distinguish which operand is
140// a closer match. The DiagnosticPredicate distinguishes between 'completely
141// no match' and 'near match', so the assembler can decide whether to give a
142// specific diagnostic, or use 'InvalidOperand' and continue to find a
143// 'better matching' diagnostic.
144//
145// For example:
146//    opcode opnd0, onpd1, opnd2
147//
148// where:
149//    opnd2 could be an 'immediate of range [-8, 7]'
150//    opnd2 could be a  'register + shift/extend'.
151//
152// If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
153// little sense to give a diagnostic that the operand should be an immediate
154// in range [-8, 7].
155//
156// This is a light-weight alternative to the 'NearMissInfo' approach
157// below which collects *all* possible diagnostics. This alternative
158// is optional and fully backward compatible with existing
159// PredicateMethods that return a 'bool' (match or no match).
160struct DiagnosticPredicate {
161  DiagnosticPredicateTy Type;
162
163  explicit DiagnosticPredicate(bool Match)
164      : Type(Match ? DiagnosticPredicateTy::Match
165                   : DiagnosticPredicateTy::NearMatch) {}
166  DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
167  DiagnosticPredicate(const DiagnosticPredicate &) = default;
168  DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
169
170  operator bool() const { return Type == DiagnosticPredicateTy::Match; }
171  bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
172  bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
173  bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
174};
175
176// When matching of an assembly instruction fails, there may be multiple
177// encodings that are close to being a match. It's often ambiguous which one
178// the programmer intended to use, so we want to report an error which mentions
179// each of these "near-miss" encodings. This struct contains information about
180// one such encoding, and why it did not match the parsed instruction.
181class NearMissInfo {
182public:
183  enum NearMissKind {
184    NoNearMiss,
185    NearMissOperand,
186    NearMissFeature,
187    NearMissPredicate,
188    NearMissTooFewOperands,
189  };
190
191  // The encoding is valid for the parsed assembly string. This is only used
192  // internally to the table-generated assembly matcher.
193  static NearMissInfo getSuccess() { return NearMissInfo(); }
194
195  // The instruction encoding is not valid because it requires some target
196  // features that are not currently enabled. MissingFeatures has a bit set for
197  // each feature that the encoding needs but which is not enabled.
198  static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
199    NearMissInfo Result;
200    Result.Kind = NearMissFeature;
201    Result.Features = MissingFeatures;
202    return Result;
203  }
204
205  // The instruction encoding is not valid because the target-specific
206  // predicate function returned an error code. FailureCode is the
207  // target-specific error code returned by the predicate.
208  static NearMissInfo getMissedPredicate(unsigned FailureCode) {
209    NearMissInfo Result;
210    Result.Kind = NearMissPredicate;
211    Result.PredicateError = FailureCode;
212    return Result;
213  }
214
215  // The instruction encoding is not valid because one (and only one) parsed
216  // operand is not of the correct type. OperandError is the error code
217  // relating to the operand class expected by the encoding. OperandClass is
218  // the type of the expected operand. Opcode is the opcode of the encoding.
219  // OperandIndex is the index into the parsed operand list.
220  static NearMissInfo getMissedOperand(unsigned OperandError,
221                                       unsigned OperandClass, unsigned Opcode,
222                                       unsigned OperandIndex) {
223    NearMissInfo Result;
224    Result.Kind = NearMissOperand;
225    Result.MissedOperand.Error = OperandError;
226    Result.MissedOperand.Class = OperandClass;
227    Result.MissedOperand.Opcode = Opcode;
228    Result.MissedOperand.Index = OperandIndex;
229    return Result;
230  }
231
232  // The instruction encoding is not valid because it expects more operands
233  // than were parsed. OperandClass is the class of the expected operand that
234  // was not provided. Opcode is the instruction encoding.
235  static NearMissInfo getTooFewOperands(unsigned OperandClass,
236                                        unsigned Opcode) {
237    NearMissInfo Result;
238    Result.Kind = NearMissTooFewOperands;
239    Result.TooFewOperands.Class = OperandClass;
240    Result.TooFewOperands.Opcode = Opcode;
241    return Result;
242  }
243
244  operator bool() const { return Kind != NoNearMiss; }
245
246  NearMissKind getKind() const { return Kind; }
247
248  // Feature flags required by the instruction, that the current target does
249  // not have.
250  const FeatureBitset& getFeatures() const {
251    assert(Kind == NearMissFeature);
252    return Features;
253  }
254  // Error code returned by the target predicate when validating this
255  // instruction encoding.
256  unsigned getPredicateError() const {
257    assert(Kind == NearMissPredicate);
258    return PredicateError;
259  }
260  // MatchClassKind of the operand that we expected to see.
261  unsigned getOperandClass() const {
262    assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
263    return MissedOperand.Class;
264  }
265  // Opcode of the encoding we were trying to match.
266  unsigned getOpcode() const {
267    assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
268    return MissedOperand.Opcode;
269  }
270  // Error code returned when validating the operand.
271  unsigned getOperandError() const {
272    assert(Kind == NearMissOperand);
273    return MissedOperand.Error;
274  }
275  // Index of the actual operand we were trying to match in the list of parsed
276  // operands.
277  unsigned getOperandIndex() const {
278    assert(Kind == NearMissOperand);
279    return MissedOperand.Index;
280  }
281
282private:
283  NearMissKind Kind;
284
285  // These two structs share a common prefix, so we can safely rely on the fact
286  // that they overlap in the union.
287  struct MissedOpInfo {
288    unsigned Class;
289    unsigned Opcode;
290    unsigned Error;
291    unsigned Index;
292  };
293
294  struct TooFewOperandsInfo {
295    unsigned Class;
296    unsigned Opcode;
297  };
298
299  union {
300    FeatureBitset Features;
301    unsigned PredicateError;
302    MissedOpInfo MissedOperand;
303    TooFewOperandsInfo TooFewOperands;
304  };
305
306  NearMissInfo() : Kind(NoNearMiss) {}
307};
308
309/// MCTargetAsmParser - Generic interface to target specific assembly parsers.
310class MCTargetAsmParser : public MCAsmParserExtension {
311public:
312  enum MatchResultTy {
313    Match_InvalidOperand,
314    Match_InvalidTiedOperand,
315    Match_MissingFeature,
316    Match_MnemonicFail,
317    Match_Success,
318    Match_NearMisses,
319    FIRST_TARGET_MATCH_RESULT_TY
320  };
321
322protected: // Can only create subclasses.
323  MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
324                    const MCInstrInfo &MII);
325
326  /// Create a copy of STI and return a non-const reference to it.
327  MCSubtargetInfo &copySTI();
328
329  /// AvailableFeatures - The current set of available features.
330  FeatureBitset AvailableFeatures;
331
332  /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
333  bool ParsingInlineAsm = false;
334
335  /// SemaCallback - The Sema callback implementation.  Must be set when parsing
336  /// ms-style inline assembly.
337  MCAsmParserSemaCallback *SemaCallback;
338
339  /// Set of options which affects instrumentation of inline assembly.
340  MCTargetOptions MCOptions;
341
342  /// Current STI.
343  const MCSubtargetInfo *STI;
344
345  const MCInstrInfo &MII;
346
347public:
348  MCTargetAsmParser(const MCTargetAsmParser &) = delete;
349  MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
350
351  ~MCTargetAsmParser() override;
352
353  const MCSubtargetInfo &getSTI() const;
354
355  const FeatureBitset& getAvailableFeatures() const {
356    return AvailableFeatures;
357  }
358  void setAvailableFeatures(const FeatureBitset& Value) {
359    AvailableFeatures = Value;
360  }
361
362  bool isParsingInlineAsm () { return ParsingInlineAsm; }
363  void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
364
365  MCTargetOptions getTargetOptions() const { return MCOptions; }
366
367  void setSemaCallback(MCAsmParserSemaCallback *Callback) {
368    SemaCallback = Callback;
369  }
370
371  // Target-specific parsing of expression.
372  virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
373    return getParser().parsePrimaryExpr(Res, EndLoc);
374  }
375
376  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
377                             SMLoc &EndLoc) = 0;
378
379  /// ParseInstruction - Parse one assembly instruction.
380  ///
381  /// The parser is positioned following the instruction name. The target
382  /// specific instruction parser should parse the entire instruction and
383  /// construct the appropriate MCInst, or emit an error. On success, the entire
384  /// line should be parsed up to and including the end-of-statement token. On
385  /// failure, the parser is not required to read to the end of the line.
386  //
387  /// \param Name - The instruction name.
388  /// \param NameLoc - The source location of the name.
389  /// \param Operands [out] - The list of parsed operands, this returns
390  ///        ownership of them to the caller.
391  /// \return True on failure.
392  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
393                                SMLoc NameLoc, OperandVector &Operands) = 0;
394  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
395                                AsmToken Token, OperandVector &Operands) {
396    return ParseInstruction(Info, Name, Token.getLoc(), Operands);
397  }
398
399  /// ParseDirective - Parse a target specific assembler directive
400  ///
401  /// The parser is positioned following the directive name.  The target
402  /// specific directive parser should parse the entire directive doing or
403  /// recording any target specific work, or return true and do nothing if the
404  /// directive is not target specific. If the directive is specific for
405  /// the target, the entire line is parsed up to and including the
406  /// end-of-statement token and false is returned.
407  ///
408  /// \param DirectiveID - the identifier token of the directive.
409  virtual bool ParseDirective(AsmToken DirectiveID) = 0;
410
411  /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
412  /// instruction as an actual MCInst and emit it to the specified MCStreamer.
413  /// This returns false on success and returns true on failure to match.
414  ///
415  /// On failure, the target parser is responsible for emitting a diagnostic
416  /// explaining the match failure.
417  virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
418                                       OperandVector &Operands, MCStreamer &Out,
419                                       uint64_t &ErrorInfo,
420                                       bool MatchingInlineAsm) = 0;
421
422  /// Allows targets to let registers opt out of clobber lists.
423  virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
424
425  /// Allow a target to add special case operand matching for things that
426  /// tblgen doesn't/can't handle effectively. For example, literal
427  /// immediates on ARM. TableGen expects a token operand, but the parser
428  /// will recognize them as immediates.
429  virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
430                                              unsigned Kind) {
431    return Match_InvalidOperand;
432  }
433
434  /// Validate the instruction match against any complex target predicates
435  /// before rendering any operands to it.
436  virtual unsigned
437  checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
438    return Match_Success;
439  }
440
441  /// checkTargetMatchPredicate - Validate the instruction match against
442  /// any complex target predicates not expressible via match classes.
443  virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
444    return Match_Success;
445  }
446
447  virtual void convertToMapAndConstraints(unsigned Kind,
448                                          const OperandVector &Operands) = 0;
449
450  /// Returns whether two registers are equal and is used by the tied-operands
451  /// checks in the AsmMatcher. This method can be overridden allow e.g. a
452  /// sub- or super-register as the tied operand.
453  virtual bool regsEqual(const MCParsedAsmOperand &Op1,
454                         const MCParsedAsmOperand &Op2) const {
455    assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
456    return Op1.getReg() == Op2.getReg();
457  }
458
459  // Return whether this parser uses assignment statements with equals tokens
460  virtual bool equalIsAsmAssignment() { return true; };
461  // Return whether this start of statement identifier is a label
462  virtual bool isLabel(AsmToken &Token) { return true; };
463  // Return whether this parser accept star as start of statement
464  virtual bool starIsStartOfStatement() { return false; };
465
466  virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
467                                            MCSymbolRefExpr::VariantKind,
468                                            MCContext &Ctx) {
469    return nullptr;
470  }
471
472  // For actions that have to be performed before a label is emitted
473  virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
474
475  virtual void onLabelParsed(MCSymbol *Symbol) {}
476
477  /// Ensure that all previously parsed instructions have been emitted to the
478  /// output streamer, if the target does not emit them immediately.
479  virtual void flushPendingInstructions(MCStreamer &Out) {}
480
481  virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
482                                              AsmToken::TokenKind OperatorToken,
483                                              MCContext &Ctx) {
484    return nullptr;
485  }
486
487  // For any checks or cleanups at the end of parsing.
488  virtual void onEndOfFile() {}
489};
490
491} // end namespace llvm
492
493#endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
494