1//===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10#define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11
12#include "llvm/ADT/StringRef.h"
13#include "llvm/MC/MCExpr.h"
14#include "llvm/MC/MCParser/MCAsmParserExtension.h"
15#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
16#include "llvm/MC/MCTargetOptions.h"
17#include "llvm/Support/SMLoc.h"
18#include "llvm/TargetParser/SubtargetFeature.h"
19#include <cstdint>
20#include <memory>
21
22namespace llvm {
23
24class MCContext;
25class MCInst;
26class MCInstrInfo;
27class MCRegister;
28class MCStreamer;
29class MCSubtargetInfo;
30class MCSymbol;
31template <typename T> class SmallVectorImpl;
32
33using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
34
35enum AsmRewriteKind {
36  AOK_Align,          // Rewrite align as .align.
37  AOK_EVEN,           // Rewrite even as .even.
38  AOK_Emit,           // Rewrite _emit as .byte.
39  AOK_CallInput,      // Rewrite in terms of ${N:P}.
40  AOK_Input,          // Rewrite in terms of $N.
41  AOK_Output,         // Rewrite in terms of $N.
42  AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
43  AOK_Label,          // Rewrite local labels.
44  AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
45  AOK_Skip,           // Skip emission (e.g., offset/type operators).
46  AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
47};
48
49const char AsmRewritePrecedence [] = {
50  2, // AOK_Align
51  2, // AOK_EVEN
52  2, // AOK_Emit
53  3, // AOK_Input
54  3, // AOK_CallInput
55  3, // AOK_Output
56  5, // AOK_SizeDirective
57  1, // AOK_Label
58  5, // AOK_EndOfStatement
59  2, // AOK_Skip
60  2  // AOK_IntelExpr
61};
62
63// Represent the various parts which make up an intel expression,
64// used for emitting compound intel expressions
65struct IntelExpr {
66  bool NeedBracs = false;
67  int64_t Imm = 0;
68  StringRef BaseReg;
69  StringRef IndexReg;
70  StringRef OffsetName;
71  unsigned Scale = 1;
72
73  IntelExpr() = default;
74  // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
75  IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
76            StringRef offsetName, int64_t imm, bool needBracs)
77      : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
78        OffsetName(offsetName), Scale(1) {
79    if (scale)
80      Scale = scale;
81  }
82  bool hasBaseReg() const { return !BaseReg.empty(); }
83  bool hasIndexReg() const { return !IndexReg.empty(); }
84  bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
85  bool hasOffset() const { return !OffsetName.empty(); }
86  // Normally we won't emit immediates unconditionally,
87  // unless we've got no other components
88  bool emitImm() const { return !(hasRegs() || hasOffset()); }
89  bool isValid() const {
90    return (Scale == 1) ||
91           (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
92  }
93};
94
95struct AsmRewrite {
96  AsmRewriteKind Kind;
97  SMLoc Loc;
98  unsigned Len;
99  bool Done;
100  int64_t Val;
101  StringRef Label;
102  IntelExpr IntelExp;
103  bool IntelExpRestricted;
104
105public:
106  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0,
107             bool Restricted = false)
108      : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {
109    IntelExpRestricted = Restricted;
110  }
111  AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
112    : AsmRewrite(kind, loc, len) { Label = label; }
113  AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
114    : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
115};
116
117struct ParseInstructionInfo {
118  SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
119
120  ParseInstructionInfo() = default;
121  ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
122    : AsmRewrites(rewrites) {}
123};
124
125enum OperandMatchResultTy {
126  MatchOperand_Success,  // operand matched successfully
127  MatchOperand_NoMatch,  // operand did not match
128  MatchOperand_ParseFail // operand matched but had errors
129};
130
131/// Ternary parse status returned by various parse* methods.
132class ParseStatus {
133  enum class StatusTy { Success, Failure, NoMatch } Status;
134
135public:
136#if __cplusplus >= 202002L
137  using enum StatusTy;
138#else
139  static constexpr StatusTy Success = StatusTy::Success;
140  static constexpr StatusTy Failure = StatusTy::Failure;
141  static constexpr StatusTy NoMatch = StatusTy::NoMatch;
142#endif
143
144  constexpr ParseStatus() : Status(NoMatch) {}
145
146  constexpr ParseStatus(StatusTy Status) : Status(Status) {}
147
148  constexpr ParseStatus(bool Error) : Status(Error ? Failure : Success) {}
149
150  template <typename T> constexpr ParseStatus(T) = delete;
151
152  constexpr bool isSuccess() const { return Status == StatusTy::Success; }
153  constexpr bool isFailure() const { return Status == StatusTy::Failure; }
154  constexpr bool isNoMatch() const { return Status == StatusTy::NoMatch; }
155
156  // Allow implicit conversions to / from OperandMatchResultTy.
157  LLVM_DEPRECATED("Migrate to ParseStatus", "")
158  constexpr ParseStatus(OperandMatchResultTy R)
159      : Status(R == MatchOperand_Success     ? Success
160               : R == MatchOperand_ParseFail ? Failure
161                                             : NoMatch) {}
162  LLVM_DEPRECATED("Migrate to ParseStatus", "")
163  constexpr operator OperandMatchResultTy() const {
164    return isSuccess()   ? MatchOperand_Success
165           : isFailure() ? MatchOperand_ParseFail
166                         : MatchOperand_NoMatch;
167  }
168};
169
170enum class DiagnosticPredicateTy {
171  Match,
172  NearMatch,
173  NoMatch,
174};
175
176// When an operand is parsed, the assembler will try to iterate through a set of
177// possible operand classes that the operand might match and call the
178// corresponding PredicateMethod to determine that.
179//
180// If there are two AsmOperands that would give a specific diagnostic if there
181// is no match, there is currently no mechanism to distinguish which operand is
182// a closer match. The DiagnosticPredicate distinguishes between 'completely
183// no match' and 'near match', so the assembler can decide whether to give a
184// specific diagnostic, or use 'InvalidOperand' and continue to find a
185// 'better matching' diagnostic.
186//
187// For example:
188//    opcode opnd0, onpd1, opnd2
189//
190// where:
191//    opnd2 could be an 'immediate of range [-8, 7]'
192//    opnd2 could be a  'register + shift/extend'.
193//
194// If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
195// little sense to give a diagnostic that the operand should be an immediate
196// in range [-8, 7].
197//
198// This is a light-weight alternative to the 'NearMissInfo' approach
199// below which collects *all* possible diagnostics. This alternative
200// is optional and fully backward compatible with existing
201// PredicateMethods that return a 'bool' (match or no match).
202struct DiagnosticPredicate {
203  DiagnosticPredicateTy Type;
204
205  explicit DiagnosticPredicate(bool Match)
206      : Type(Match ? DiagnosticPredicateTy::Match
207                   : DiagnosticPredicateTy::NearMatch) {}
208  DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
209  DiagnosticPredicate(const DiagnosticPredicate &) = default;
210  DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
211
212  operator bool() const { return Type == DiagnosticPredicateTy::Match; }
213  bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
214  bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
215  bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
216};
217
218// When matching of an assembly instruction fails, there may be multiple
219// encodings that are close to being a match. It's often ambiguous which one
220// the programmer intended to use, so we want to report an error which mentions
221// each of these "near-miss" encodings. This struct contains information about
222// one such encoding, and why it did not match the parsed instruction.
223class NearMissInfo {
224public:
225  enum NearMissKind {
226    NoNearMiss,
227    NearMissOperand,
228    NearMissFeature,
229    NearMissPredicate,
230    NearMissTooFewOperands,
231  };
232
233  // The encoding is valid for the parsed assembly string. This is only used
234  // internally to the table-generated assembly matcher.
235  static NearMissInfo getSuccess() { return NearMissInfo(); }
236
237  // The instruction encoding is not valid because it requires some target
238  // features that are not currently enabled. MissingFeatures has a bit set for
239  // each feature that the encoding needs but which is not enabled.
240  static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
241    NearMissInfo Result;
242    Result.Kind = NearMissFeature;
243    Result.Features = MissingFeatures;
244    return Result;
245  }
246
247  // The instruction encoding is not valid because the target-specific
248  // predicate function returned an error code. FailureCode is the
249  // target-specific error code returned by the predicate.
250  static NearMissInfo getMissedPredicate(unsigned FailureCode) {
251    NearMissInfo Result;
252    Result.Kind = NearMissPredicate;
253    Result.PredicateError = FailureCode;
254    return Result;
255  }
256
257  // The instruction encoding is not valid because one (and only one) parsed
258  // operand is not of the correct type. OperandError is the error code
259  // relating to the operand class expected by the encoding. OperandClass is
260  // the type of the expected operand. Opcode is the opcode of the encoding.
261  // OperandIndex is the index into the parsed operand list.
262  static NearMissInfo getMissedOperand(unsigned OperandError,
263                                       unsigned OperandClass, unsigned Opcode,
264                                       unsigned OperandIndex) {
265    NearMissInfo Result;
266    Result.Kind = NearMissOperand;
267    Result.MissedOperand.Error = OperandError;
268    Result.MissedOperand.Class = OperandClass;
269    Result.MissedOperand.Opcode = Opcode;
270    Result.MissedOperand.Index = OperandIndex;
271    return Result;
272  }
273
274  // The instruction encoding is not valid because it expects more operands
275  // than were parsed. OperandClass is the class of the expected operand that
276  // was not provided. Opcode is the instruction encoding.
277  static NearMissInfo getTooFewOperands(unsigned OperandClass,
278                                        unsigned Opcode) {
279    NearMissInfo Result;
280    Result.Kind = NearMissTooFewOperands;
281    Result.TooFewOperands.Class = OperandClass;
282    Result.TooFewOperands.Opcode = Opcode;
283    return Result;
284  }
285
286  operator bool() const { return Kind != NoNearMiss; }
287
288  NearMissKind getKind() const { return Kind; }
289
290  // Feature flags required by the instruction, that the current target does
291  // not have.
292  const FeatureBitset& getFeatures() const {
293    assert(Kind == NearMissFeature);
294    return Features;
295  }
296  // Error code returned by the target predicate when validating this
297  // instruction encoding.
298  unsigned getPredicateError() const {
299    assert(Kind == NearMissPredicate);
300    return PredicateError;
301  }
302  // MatchClassKind of the operand that we expected to see.
303  unsigned getOperandClass() const {
304    assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
305    return MissedOperand.Class;
306  }
307  // Opcode of the encoding we were trying to match.
308  unsigned getOpcode() const {
309    assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
310    return MissedOperand.Opcode;
311  }
312  // Error code returned when validating the operand.
313  unsigned getOperandError() const {
314    assert(Kind == NearMissOperand);
315    return MissedOperand.Error;
316  }
317  // Index of the actual operand we were trying to match in the list of parsed
318  // operands.
319  unsigned getOperandIndex() const {
320    assert(Kind == NearMissOperand);
321    return MissedOperand.Index;
322  }
323
324private:
325  NearMissKind Kind;
326
327  // These two structs share a common prefix, so we can safely rely on the fact
328  // that they overlap in the union.
329  struct MissedOpInfo {
330    unsigned Class;
331    unsigned Opcode;
332    unsigned Error;
333    unsigned Index;
334  };
335
336  struct TooFewOperandsInfo {
337    unsigned Class;
338    unsigned Opcode;
339  };
340
341  union {
342    FeatureBitset Features;
343    unsigned PredicateError;
344    MissedOpInfo MissedOperand;
345    TooFewOperandsInfo TooFewOperands;
346  };
347
348  NearMissInfo() : Kind(NoNearMiss) {}
349};
350
351/// MCTargetAsmParser - Generic interface to target specific assembly parsers.
352class MCTargetAsmParser : public MCAsmParserExtension {
353public:
354  enum MatchResultTy {
355    Match_InvalidOperand,
356    Match_InvalidTiedOperand,
357    Match_MissingFeature,
358    Match_MnemonicFail,
359    Match_Success,
360    Match_NearMisses,
361    FIRST_TARGET_MATCH_RESULT_TY
362  };
363
364protected: // Can only create subclasses.
365  MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
366                    const MCInstrInfo &MII);
367
368  /// Create a copy of STI and return a non-const reference to it.
369  MCSubtargetInfo &copySTI();
370
371  /// AvailableFeatures - The current set of available features.
372  FeatureBitset AvailableFeatures;
373
374  /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly?
375  bool ParsingMSInlineAsm = false;
376
377  /// SemaCallback - The Sema callback implementation.  Must be set when parsing
378  /// ms-style inline assembly.
379  MCAsmParserSemaCallback *SemaCallback = nullptr;
380
381  /// Set of options which affects instrumentation of inline assembly.
382  MCTargetOptions MCOptions;
383
384  /// Current STI.
385  const MCSubtargetInfo *STI;
386
387  const MCInstrInfo &MII;
388
389public:
390  MCTargetAsmParser(const MCTargetAsmParser &) = delete;
391  MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
392
393  ~MCTargetAsmParser() override;
394
395  const MCSubtargetInfo &getSTI() const;
396
397  const FeatureBitset& getAvailableFeatures() const {
398    return AvailableFeatures;
399  }
400  void setAvailableFeatures(const FeatureBitset& Value) {
401    AvailableFeatures = Value;
402  }
403
404  bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; }
405  void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; }
406
407  MCTargetOptions getTargetOptions() const { return MCOptions; }
408
409  void setSemaCallback(MCAsmParserSemaCallback *Callback) {
410    SemaCallback = Callback;
411  }
412
413  // Target-specific parsing of expression.
414  virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
415    return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
416  }
417
418  virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc,
419                             SMLoc &EndLoc) = 0;
420
421  /// tryParseRegister - parse one register if possible
422  ///
423  /// Check whether a register specification can be parsed at the current
424  /// location, without failing the entire parse if it can't. Must not consume
425  /// tokens if the parse fails.
426  virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
427                                       SMLoc &EndLoc) = 0;
428
429  /// ParseInstruction - Parse one assembly instruction.
430  ///
431  /// The parser is positioned following the instruction name. The target
432  /// specific instruction parser should parse the entire instruction and
433  /// construct the appropriate MCInst, or emit an error. On success, the entire
434  /// line should be parsed up to and including the end-of-statement token. On
435  /// failure, the parser is not required to read to the end of the line.
436  //
437  /// \param Name - The instruction name.
438  /// \param NameLoc - The source location of the name.
439  /// \param Operands [out] - The list of parsed operands, this returns
440  ///        ownership of them to the caller.
441  /// \return True on failure.
442  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
443                                SMLoc NameLoc, OperandVector &Operands) = 0;
444  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
445                                AsmToken Token, OperandVector &Operands) {
446    return ParseInstruction(Info, Name, Token.getLoc(), Operands);
447  }
448
449  /// ParseDirective - Parse a target specific assembler directive
450  /// This method is deprecated, use 'parseDirective' instead.
451  ///
452  /// The parser is positioned following the directive name.  The target
453  /// specific directive parser should parse the entire directive doing or
454  /// recording any target specific work, or return true and do nothing if the
455  /// directive is not target specific. If the directive is specific for
456  /// the target, the entire line is parsed up to and including the
457  /// end-of-statement token and false is returned.
458  ///
459  /// \param DirectiveID - the identifier token of the directive.
460  virtual bool ParseDirective(AsmToken DirectiveID) { return true; }
461
462  /// Parses a target-specific assembler directive.
463  ///
464  /// The parser is positioned following the directive name. The target-specific
465  /// directive parser should parse the entire directive doing or recording any
466  /// target-specific work, or emit an error. On success, the entire line should
467  /// be parsed up to and including the end-of-statement token. On failure, the
468  /// parser is not required to read to the end of the line. If the directive is
469  /// not target-specific, no tokens should be consumed and NoMatch is returned.
470  ///
471  /// \param DirectiveID - The token identifying the directive.
472  virtual ParseStatus parseDirective(AsmToken DirectiveID);
473
474  /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
475  /// instruction as an actual MCInst and emit it to the specified MCStreamer.
476  /// This returns false on success and returns true on failure to match.
477  ///
478  /// On failure, the target parser is responsible for emitting a diagnostic
479  /// explaining the match failure.
480  virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
481                                       OperandVector &Operands, MCStreamer &Out,
482                                       uint64_t &ErrorInfo,
483                                       bool MatchingInlineAsm) = 0;
484
485  /// Allows targets to let registers opt out of clobber lists.
486  virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
487
488  /// Allow a target to add special case operand matching for things that
489  /// tblgen doesn't/can't handle effectively. For example, literal
490  /// immediates on ARM. TableGen expects a token operand, but the parser
491  /// will recognize them as immediates.
492  virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
493                                              unsigned Kind) {
494    return Match_InvalidOperand;
495  }
496
497  /// Validate the instruction match against any complex target predicates
498  /// before rendering any operands to it.
499  virtual unsigned
500  checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
501    return Match_Success;
502  }
503
504  /// checkTargetMatchPredicate - Validate the instruction match against
505  /// any complex target predicates not expressible via match classes.
506  virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
507    return Match_Success;
508  }
509
510  virtual void convertToMapAndConstraints(unsigned Kind,
511                                          const OperandVector &Operands) = 0;
512
513  /// Returns whether two operands are registers and are equal. This is used
514  /// by the tied-operands checks in the AsmMatcher. This method can be
515  /// overridden to allow e.g. a sub- or super-register as the tied operand.
516  virtual bool areEqualRegs(const MCParsedAsmOperand &Op1,
517                            const MCParsedAsmOperand &Op2) const {
518    return Op1.isReg() && Op2.isReg() && Op1.getReg() == Op2.getReg();
519  }
520
521  // Return whether this parser uses assignment statements with equals tokens
522  virtual bool equalIsAsmAssignment() { return true; };
523  // Return whether this start of statement identifier is a label
524  virtual bool isLabel(AsmToken &Token) { return true; };
525  // Return whether this parser accept star as start of statement
526  virtual bool starIsStartOfStatement() { return false; };
527
528  virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
529                                            MCSymbolRefExpr::VariantKind,
530                                            MCContext &Ctx) {
531    return nullptr;
532  }
533
534  // For actions that have to be performed before a label is emitted
535  virtual void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {}
536
537  virtual void onLabelParsed(MCSymbol *Symbol) {}
538
539  /// Ensure that all previously parsed instructions have been emitted to the
540  /// output streamer, if the target does not emit them immediately.
541  virtual void flushPendingInstructions(MCStreamer &Out) {}
542
543  virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
544                                              AsmToken::TokenKind OperatorToken,
545                                              MCContext &Ctx) {
546    return nullptr;
547  }
548
549  // For any initialization at the beginning of parsing.
550  virtual void onBeginOfFile() {}
551
552  // For any checks or cleanups at the end of parsing.
553  virtual void onEndOfFile() {}
554};
555
556} // end namespace llvm
557
558#endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
559