1202878Srdivacky//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2202878Srdivacky//
3202878Srdivacky//                     The LLVM Compiler Infrastructure
4202878Srdivacky//
5202878Srdivacky// This file is distributed under the University of Illinois Open Source
6202878Srdivacky// License. See LICENSE.TXT for details.
7202878Srdivacky//
8202878Srdivacky//===----------------------------------------------------------------------===//
9202878Srdivacky
10249423Sdim#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11249423Sdim#define LLVM_MC_MCPARSER_MCASMLEXER_H
12202878Srdivacky
13202878Srdivacky#include "llvm/ADT/StringRef.h"
14243830Sdim#include "llvm/Support/Compiler.h"
15218893Sdim#include "llvm/Support/DataTypes.h"
16202878Srdivacky#include "llvm/Support/SMLoc.h"
17202878Srdivacky
18202878Srdivackynamespace llvm {
19202878Srdivacky
20202878Srdivacky/// AsmToken - Target independent representation for an assembler token.
21202878Srdivackyclass AsmToken {
22202878Srdivackypublic:
23202878Srdivacky  enum TokenKind {
24202878Srdivacky    // Markers
25202878Srdivacky    Eof, Error,
26202878Srdivacky
27202878Srdivacky    // String values.
28202878Srdivacky    Identifier,
29202878Srdivacky    String,
30218893Sdim
31202878Srdivacky    // Integer values.
32202878Srdivacky    Integer,
33218893Sdim
34218893Sdim    // Real values.
35218893Sdim    Real,
36218893Sdim
37202878Srdivacky    // No-value.
38202878Srdivacky    EndOfStatement,
39202878Srdivacky    Colon,
40243830Sdim    Space,
41202878Srdivacky    Plus, Minus, Tilde,
42202878Srdivacky    Slash,    // '/'
43223017Sdim    BackSlash, // '\'
44202878Srdivacky    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
45207618Srdivacky    Star, Dot, Comma, Dollar, Equal, EqualEqual,
46218893Sdim
47218893Sdim    Pipe, PipePipe, Caret,
48202878Srdivacky    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
49202878Srdivacky    Less, LessEqual, LessLess, LessGreater,
50208599Srdivacky    Greater, GreaterEqual, GreaterGreater, At
51202878Srdivacky  };
52202878Srdivacky
53234982Sdimprivate:
54202878Srdivacky  TokenKind Kind;
55202878Srdivacky
56202878Srdivacky  /// A reference to the entire token contents; this is always a pointer into
57202878Srdivacky  /// a memory buffer owned by the source manager.
58202878Srdivacky  StringRef Str;
59202878Srdivacky
60202878Srdivacky  int64_t IntVal;
61202878Srdivacky
62202878Srdivackypublic:
63202878Srdivacky  AsmToken() {}
64202878Srdivacky  AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
65202878Srdivacky    : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
66202878Srdivacky
67202878Srdivacky  TokenKind getKind() const { return Kind; }
68202878Srdivacky  bool is(TokenKind K) const { return Kind == K; }
69202878Srdivacky  bool isNot(TokenKind K) const { return Kind != K; }
70202878Srdivacky
71202878Srdivacky  SMLoc getLoc() const;
72234353Sdim  SMLoc getEndLoc() const;
73202878Srdivacky
74202878Srdivacky  /// getStringContents - Get the contents of a string token (without quotes).
75218893Sdim  StringRef getStringContents() const {
76202878Srdivacky    assert(Kind == String && "This token isn't a string!");
77202878Srdivacky    return Str.slice(1, Str.size() - 1);
78202878Srdivacky  }
79202878Srdivacky
80202878Srdivacky  /// getIdentifier - Get the identifier string for the current token, which
81202878Srdivacky  /// should be an identifier or a string. This gets the portion of the string
82202878Srdivacky  /// which should be used as the identifier, e.g., it does not include the
83202878Srdivacky  /// quotes on strings.
84202878Srdivacky  StringRef getIdentifier() const {
85202878Srdivacky    if (Kind == Identifier)
86202878Srdivacky      return getString();
87202878Srdivacky    return getStringContents();
88202878Srdivacky  }
89202878Srdivacky
90202878Srdivacky  /// getString - Get the string for the current token, this includes all
91202878Srdivacky  /// characters (for example, the quotes on strings) in the token.
92202878Srdivacky  ///
93202878Srdivacky  /// The returned StringRef points into the source manager's memory buffer, and
94202878Srdivacky  /// is safe to store across calls to Lex().
95202878Srdivacky  StringRef getString() const { return Str; }
96202878Srdivacky
97202878Srdivacky  // FIXME: Don't compute this in advance, it makes every token larger, and is
98202878Srdivacky  // also not generally what we want (it is nicer for recovery etc. to lex 123br
99202878Srdivacky  // as a single token, then diagnose as an invalid number).
100218893Sdim  int64_t getIntVal() const {
101202878Srdivacky    assert(Kind == Integer && "This token isn't an integer!");
102218893Sdim    return IntVal;
103202878Srdivacky  }
104202878Srdivacky};
105202878Srdivacky
106202878Srdivacky/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
107202878Srdivacky/// assembly lexers.
108202878Srdivackyclass MCAsmLexer {
109202878Srdivacky  /// The current token, stored in the base class for faster access.
110202878Srdivacky  AsmToken CurTok;
111218893Sdim
112202878Srdivacky  /// The location and description of the current error
113202878Srdivacky  SMLoc ErrLoc;
114202878Srdivacky  std::string Err;
115202878Srdivacky
116243830Sdim  MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
117243830Sdim  void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
118202878Srdivackyprotected: // Can only create subclasses.
119210299Sed  const char *TokStart;
120243830Sdim  bool SkipSpace;
121210299Sed
122202878Srdivacky  MCAsmLexer();
123202878Srdivacky
124202878Srdivacky  virtual AsmToken LexToken() = 0;
125218893Sdim
126202878Srdivacky  void SetError(const SMLoc &errLoc, const std::string &err) {
127202878Srdivacky    ErrLoc = errLoc;
128202878Srdivacky    Err = err;
129202878Srdivacky  }
130218893Sdim
131202878Srdivackypublic:
132202878Srdivacky  virtual ~MCAsmLexer();
133202878Srdivacky
134202878Srdivacky  /// Lex - Consume the next token from the input stream and return it.
135202878Srdivacky  ///
136202878Srdivacky  /// The lexer will continuosly return the end-of-file token once the end of
137202878Srdivacky  /// the main input file has been reached.
138202878Srdivacky  const AsmToken &Lex() {
139202878Srdivacky    return CurTok = LexToken();
140202878Srdivacky  }
141202878Srdivacky
142210299Sed  virtual StringRef LexUntilEndOfStatement() = 0;
143210299Sed
144210299Sed  /// getLoc - Get the current source location.
145210299Sed  SMLoc getLoc() const;
146210299Sed
147202878Srdivacky  /// getTok - Get the current (last) lexed token.
148202878Srdivacky  const AsmToken &getTok() {
149202878Srdivacky    return CurTok;
150202878Srdivacky  }
151218893Sdim
152202878Srdivacky  /// getErrLoc - Get the current error location
153202878Srdivacky  const SMLoc &getErrLoc() {
154202878Srdivacky    return ErrLoc;
155202878Srdivacky  }
156218893Sdim
157202878Srdivacky  /// getErr - Get the current error string
158202878Srdivacky  const std::string &getErr() {
159202878Srdivacky    return Err;
160202878Srdivacky  }
161202878Srdivacky
162202878Srdivacky  /// getKind - Get the kind of current token.
163202878Srdivacky  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
164202878Srdivacky
165243830Sdim  /// is - Check if the current token has kind \p K.
166202878Srdivacky  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
167202878Srdivacky
168243830Sdim  /// isNot - Check if the current token has kind \p K.
169202878Srdivacky  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
170243830Sdim
171243830Sdim  /// setSkipSpace - Set whether spaces should be ignored by the lexer
172243830Sdim  void setSkipSpace(bool val) { SkipSpace = val; }
173202878Srdivacky};
174202878Srdivacky
175202878Srdivacky} // End llvm namespace
176202878Srdivacky
177202878Srdivacky#endif
178