1202878Srdivacky//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2202878Srdivacky// 3202878Srdivacky// The LLVM Compiler Infrastructure 4202878Srdivacky// 5202878Srdivacky// This file is distributed under the University of Illinois Open Source 6202878Srdivacky// License. See LICENSE.TXT for details. 7202878Srdivacky// 8202878Srdivacky//===----------------------------------------------------------------------===// 9202878Srdivacky 10249423Sdim#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 11249423Sdim#define LLVM_MC_MCPARSER_MCASMLEXER_H 12202878Srdivacky 13202878Srdivacky#include "llvm/ADT/StringRef.h" 14243830Sdim#include "llvm/Support/Compiler.h" 15218893Sdim#include "llvm/Support/DataTypes.h" 16202878Srdivacky#include "llvm/Support/SMLoc.h" 17202878Srdivacky 18202878Srdivackynamespace llvm { 19202878Srdivacky 20202878Srdivacky/// AsmToken - Target independent representation for an assembler token. 21202878Srdivackyclass AsmToken { 22202878Srdivackypublic: 23202878Srdivacky enum TokenKind { 24202878Srdivacky // Markers 25202878Srdivacky Eof, Error, 26202878Srdivacky 27202878Srdivacky // String values. 28202878Srdivacky Identifier, 29202878Srdivacky String, 30218893Sdim 31202878Srdivacky // Integer values. 32202878Srdivacky Integer, 33218893Sdim 34218893Sdim // Real values. 35218893Sdim Real, 36218893Sdim 37202878Srdivacky // No-value. 38202878Srdivacky EndOfStatement, 39202878Srdivacky Colon, 40243830Sdim Space, 41202878Srdivacky Plus, Minus, Tilde, 42202878Srdivacky Slash, // '/' 43223017Sdim BackSlash, // '\' 44202878Srdivacky LParen, RParen, LBrac, RBrac, LCurly, RCurly, 45207618Srdivacky Star, Dot, Comma, Dollar, Equal, EqualEqual, 46218893Sdim 47218893Sdim Pipe, PipePipe, Caret, 48202878Srdivacky Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 49202878Srdivacky Less, LessEqual, LessLess, LessGreater, 50208599Srdivacky Greater, GreaterEqual, GreaterGreater, At 51202878Srdivacky }; 52202878Srdivacky 53234982Sdimprivate: 54202878Srdivacky TokenKind Kind; 55202878Srdivacky 56202878Srdivacky /// A reference to the entire token contents; this is always a pointer into 57202878Srdivacky /// a memory buffer owned by the source manager. 58202878Srdivacky StringRef Str; 59202878Srdivacky 60202878Srdivacky int64_t IntVal; 61202878Srdivacky 62202878Srdivackypublic: 63202878Srdivacky AsmToken() {} 64202878Srdivacky AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) 65202878Srdivacky : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} 66202878Srdivacky 67202878Srdivacky TokenKind getKind() const { return Kind; } 68202878Srdivacky bool is(TokenKind K) const { return Kind == K; } 69202878Srdivacky bool isNot(TokenKind K) const { return Kind != K; } 70202878Srdivacky 71202878Srdivacky SMLoc getLoc() const; 72234353Sdim SMLoc getEndLoc() const; 73202878Srdivacky 74202878Srdivacky /// getStringContents - Get the contents of a string token (without quotes). 75218893Sdim StringRef getStringContents() const { 76202878Srdivacky assert(Kind == String && "This token isn't a string!"); 77202878Srdivacky return Str.slice(1, Str.size() - 1); 78202878Srdivacky } 79202878Srdivacky 80202878Srdivacky /// getIdentifier - Get the identifier string for the current token, which 81202878Srdivacky /// should be an identifier or a string. This gets the portion of the string 82202878Srdivacky /// which should be used as the identifier, e.g., it does not include the 83202878Srdivacky /// quotes on strings. 84202878Srdivacky StringRef getIdentifier() const { 85202878Srdivacky if (Kind == Identifier) 86202878Srdivacky return getString(); 87202878Srdivacky return getStringContents(); 88202878Srdivacky } 89202878Srdivacky 90202878Srdivacky /// getString - Get the string for the current token, this includes all 91202878Srdivacky /// characters (for example, the quotes on strings) in the token. 92202878Srdivacky /// 93202878Srdivacky /// The returned StringRef points into the source manager's memory buffer, and 94202878Srdivacky /// is safe to store across calls to Lex(). 95202878Srdivacky StringRef getString() const { return Str; } 96202878Srdivacky 97202878Srdivacky // FIXME: Don't compute this in advance, it makes every token larger, and is 98202878Srdivacky // also not generally what we want (it is nicer for recovery etc. to lex 123br 99202878Srdivacky // as a single token, then diagnose as an invalid number). 100218893Sdim int64_t getIntVal() const { 101202878Srdivacky assert(Kind == Integer && "This token isn't an integer!"); 102218893Sdim return IntVal; 103202878Srdivacky } 104202878Srdivacky}; 105202878Srdivacky 106202878Srdivacky/// MCAsmLexer - Generic assembler lexer interface, for use by target specific 107202878Srdivacky/// assembly lexers. 108202878Srdivackyclass MCAsmLexer { 109202878Srdivacky /// The current token, stored in the base class for faster access. 110202878Srdivacky AsmToken CurTok; 111218893Sdim 112202878Srdivacky /// The location and description of the current error 113202878Srdivacky SMLoc ErrLoc; 114202878Srdivacky std::string Err; 115202878Srdivacky 116243830Sdim MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 117243830Sdim void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 118202878Srdivackyprotected: // Can only create subclasses. 119210299Sed const char *TokStart; 120243830Sdim bool SkipSpace; 121210299Sed 122202878Srdivacky MCAsmLexer(); 123202878Srdivacky 124202878Srdivacky virtual AsmToken LexToken() = 0; 125218893Sdim 126202878Srdivacky void SetError(const SMLoc &errLoc, const std::string &err) { 127202878Srdivacky ErrLoc = errLoc; 128202878Srdivacky Err = err; 129202878Srdivacky } 130218893Sdim 131202878Srdivackypublic: 132202878Srdivacky virtual ~MCAsmLexer(); 133202878Srdivacky 134202878Srdivacky /// Lex - Consume the next token from the input stream and return it. 135202878Srdivacky /// 136202878Srdivacky /// The lexer will continuosly return the end-of-file token once the end of 137202878Srdivacky /// the main input file has been reached. 138202878Srdivacky const AsmToken &Lex() { 139202878Srdivacky return CurTok = LexToken(); 140202878Srdivacky } 141202878Srdivacky 142210299Sed virtual StringRef LexUntilEndOfStatement() = 0; 143210299Sed 144210299Sed /// getLoc - Get the current source location. 145210299Sed SMLoc getLoc() const; 146210299Sed 147202878Srdivacky /// getTok - Get the current (last) lexed token. 148202878Srdivacky const AsmToken &getTok() { 149202878Srdivacky return CurTok; 150202878Srdivacky } 151218893Sdim 152202878Srdivacky /// getErrLoc - Get the current error location 153202878Srdivacky const SMLoc &getErrLoc() { 154202878Srdivacky return ErrLoc; 155202878Srdivacky } 156218893Sdim 157202878Srdivacky /// getErr - Get the current error string 158202878Srdivacky const std::string &getErr() { 159202878Srdivacky return Err; 160202878Srdivacky } 161202878Srdivacky 162202878Srdivacky /// getKind - Get the kind of current token. 163202878Srdivacky AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 164202878Srdivacky 165243830Sdim /// is - Check if the current token has kind \p K. 166202878Srdivacky bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 167202878Srdivacky 168243830Sdim /// isNot - Check if the current token has kind \p K. 169202878Srdivacky bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 170243830Sdim 171243830Sdim /// setSkipSpace - Set whether spaces should be ignored by the lexer 172243830Sdim void setSkipSpace(bool val) { SkipSpace = val; } 173202878Srdivacky}; 174202878Srdivacky 175202878Srdivacky} // End llvm namespace 176202878Srdivacky 177202878Srdivacky#endif 178