1202878Srdivacky//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// 2202878Srdivacky// 3202878Srdivacky// The LLVM Compiler Infrastructure 4202878Srdivacky// 5202878Srdivacky// This file is distributed under the University of Illinois Open Source 6202878Srdivacky// License. See LICENSE.TXT for details. 7202878Srdivacky// 8202878Srdivacky//===----------------------------------------------------------------------===// 9202878Srdivacky// 10202878Srdivacky// This class implements the lexer for assembly files. 11202878Srdivacky// 12202878Srdivacky//===----------------------------------------------------------------------===// 13202878Srdivacky 14202878Srdivacky#include "llvm/MC/MCParser/AsmLexer.h" 15252723Sdim#include "llvm/MC/MCAsmInfo.h" 16252723Sdim#include "llvm/Support/MemoryBuffer.h" 17202878Srdivacky#include "llvm/Support/SMLoc.h" 18218893Sdim#include <cctype> 19202878Srdivacky#include <cerrno> 20202878Srdivacky#include <cstdio> 21202878Srdivacky#include <cstdlib> 22202878Srdivackyusing namespace llvm; 23202878Srdivacky 24202878SrdivackyAsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { 25202878Srdivacky CurBuf = NULL; 26202878Srdivacky CurPtr = NULL; 27226890Sdim isAtStartOfLine = true; 28202878Srdivacky} 29202878Srdivacky 30202878SrdivackyAsmLexer::~AsmLexer() { 31202878Srdivacky} 32202878Srdivacky 33202878Srdivackyvoid AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { 34202878Srdivacky CurBuf = buf; 35218893Sdim 36202878Srdivacky if (ptr) 37202878Srdivacky CurPtr = ptr; 38202878Srdivacky else 39202878Srdivacky CurPtr = CurBuf->getBufferStart(); 40218893Sdim 41202878Srdivacky TokStart = 0; 42202878Srdivacky} 43202878Srdivacky 44202878Srdivacky/// ReturnError - Set the error to the specified string at the specified 45202878Srdivacky/// location. This is defined to always return AsmToken::Error. 46202878SrdivackyAsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { 47202878Srdivacky SetError(SMLoc::getFromPointer(Loc), Msg); 48218893Sdim 49202878Srdivacky return AsmToken(AsmToken::Error, StringRef(Loc, 0)); 50202878Srdivacky} 51202878Srdivacky 52202878Srdivackyint AsmLexer::getNextChar() { 53202878Srdivacky char CurChar = *CurPtr++; 54202878Srdivacky switch (CurChar) { 55202878Srdivacky default: 56202878Srdivacky return (unsigned char)CurChar; 57202878Srdivacky case 0: 58202878Srdivacky // A nul character in the stream is either the end of the current buffer or 59202878Srdivacky // a random nul in the file. Disambiguate that here. 60202878Srdivacky if (CurPtr-1 != CurBuf->getBufferEnd()) 61202878Srdivacky return 0; // Just whitespace. 62218893Sdim 63202878Srdivacky // Otherwise, return end of file. 64218893Sdim --CurPtr; // Another call to lex will return EOF again. 65202878Srdivacky return EOF; 66202878Srdivacky } 67202878Srdivacky} 68202878Srdivacky 69218893Sdim/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? 70218893Sdim/// 71218893Sdim/// The leading integral digit sequence and dot should have already been 72218893Sdim/// consumed, some or all of the fractional digit sequence *can* have been 73218893Sdim/// consumed. 74218893SdimAsmToken AsmLexer::LexFloatLiteral() { 75218893Sdim // Skip the fractional digit sequence. 76218893Sdim while (isdigit(*CurPtr)) 77218893Sdim ++CurPtr; 78218893Sdim 79218893Sdim // Check for exponent; we intentionally accept a slighlty wider set of 80218893Sdim // literals here and rely on the upstream client to reject invalid ones (e.g., 81218893Sdim // "1e+"). 82218893Sdim if (*CurPtr == 'e' || *CurPtr == 'E') { 83218893Sdim ++CurPtr; 84218893Sdim if (*CurPtr == '-' || *CurPtr == '+') 85218893Sdim ++CurPtr; 86218893Sdim while (isdigit(*CurPtr)) 87218893Sdim ++CurPtr; 88218893Sdim } 89218893Sdim 90218893Sdim return AsmToken(AsmToken::Real, 91218893Sdim StringRef(TokStart, CurPtr - TokStart)); 92218893Sdim} 93218893Sdim 94263509Sdim/// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ 95263509Sdim/// while making sure there are enough actual digits around for the constant to 96263509Sdim/// be valid. 97263509Sdim/// 98263509Sdim/// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed 99263509Sdim/// before we get here. 100263509SdimAsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { 101263509Sdim assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && 102263509Sdim "unexpected parse state in floating hex"); 103263509Sdim bool NoFracDigits = true; 104263509Sdim 105263509Sdim // Skip the fractional part if there is one 106263509Sdim if (*CurPtr == '.') { 107263509Sdim ++CurPtr; 108263509Sdim 109263509Sdim const char *FracStart = CurPtr; 110263509Sdim while (isxdigit(*CurPtr)) 111263509Sdim ++CurPtr; 112263509Sdim 113263509Sdim NoFracDigits = CurPtr == FracStart; 114263509Sdim } 115263509Sdim 116263509Sdim if (NoIntDigits && NoFracDigits) 117263509Sdim return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 118263509Sdim "expected at least one significand digit"); 119263509Sdim 120263509Sdim // Make sure we do have some kind of proper exponent part 121263509Sdim if (*CurPtr != 'p' && *CurPtr != 'P') 122263509Sdim return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 123263509Sdim "expected exponent part 'p'"); 124263509Sdim ++CurPtr; 125263509Sdim 126263509Sdim if (*CurPtr == '+' || *CurPtr == '-') 127263509Sdim ++CurPtr; 128263509Sdim 129263509Sdim // N.b. exponent digits are *not* hex 130263509Sdim const char *ExpStart = CurPtr; 131263509Sdim while (isdigit(*CurPtr)) 132263509Sdim ++CurPtr; 133263509Sdim 134263509Sdim if (CurPtr == ExpStart) 135263509Sdim return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 136263509Sdim "expected at least one exponent digit"); 137263509Sdim 138263509Sdim return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); 139263509Sdim} 140263509Sdim 141263509Sdim/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* 142218893Sdimstatic bool IsIdentifierChar(char c) { 143263509Sdim return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?'; 144218893Sdim} 145202878SrdivackyAsmToken AsmLexer::LexIdentifier() { 146218893Sdim // Check for floating point literals. 147218893Sdim if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { 148218893Sdim // Disambiguate a .1243foo identifier from a floating literal. 149218893Sdim while (isdigit(*CurPtr)) 150218893Sdim ++CurPtr; 151218893Sdim if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr)) 152218893Sdim return LexFloatLiteral(); 153218893Sdim } 154218893Sdim 155218893Sdim while (IsIdentifierChar(*CurPtr)) 156202878Srdivacky ++CurPtr; 157218893Sdim 158207618Srdivacky // Handle . as a special case. 159207618Srdivacky if (CurPtr == TokStart+1 && TokStart[0] == '.') 160207618Srdivacky return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); 161218893Sdim 162202878Srdivacky return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); 163202878Srdivacky} 164202878Srdivacky 165202878Srdivacky/// LexSlash: Slash: / 166202878Srdivacky/// C-Style Comment: /* ... */ 167202878SrdivackyAsmToken AsmLexer::LexSlash() { 168202878Srdivacky switch (*CurPtr) { 169202878Srdivacky case '*': break; // C style comment. 170202878Srdivacky case '/': return ++CurPtr, LexLineComment(); 171218893Sdim default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1)); 172202878Srdivacky } 173202878Srdivacky 174202878Srdivacky // C Style comment. 175202878Srdivacky ++CurPtr; // skip the star. 176202878Srdivacky while (1) { 177202878Srdivacky int CurChar = getNextChar(); 178202878Srdivacky switch (CurChar) { 179202878Srdivacky case EOF: 180202878Srdivacky return ReturnError(TokStart, "unterminated comment"); 181202878Srdivacky case '*': 182202878Srdivacky // End of the comment? 183202878Srdivacky if (CurPtr[0] != '/') break; 184218893Sdim 185202878Srdivacky ++CurPtr; // End the */. 186202878Srdivacky return LexToken(); 187202878Srdivacky } 188202878Srdivacky } 189202878Srdivacky} 190202878Srdivacky 191202878Srdivacky/// LexLineComment: Comment: #[^\n]* 192202878Srdivacky/// : //[^\n]* 193202878SrdivackyAsmToken AsmLexer::LexLineComment() { 194202878Srdivacky // FIXME: This is broken if we happen to a comment at the end of a file, which 195202878Srdivacky // was .included, and which doesn't end with a newline. 196202878Srdivacky int CurChar = getNextChar(); 197226890Sdim while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) 198202878Srdivacky CurChar = getNextChar(); 199218893Sdim 200202878Srdivacky if (CurChar == EOF) 201202878Srdivacky return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); 202202878Srdivacky return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); 203202878Srdivacky} 204202878Srdivacky 205212904Sdimstatic void SkipIgnoredIntegerSuffix(const char *&CurPtr) { 206252723Sdim // Skip ULL, UL, U, L and LL suffices. 207252723Sdim if (CurPtr[0] == 'U') 208252723Sdim ++CurPtr; 209252723Sdim if (CurPtr[0] == 'L') 210252723Sdim ++CurPtr; 211252723Sdim if (CurPtr[0] == 'L') 212252723Sdim ++CurPtr; 213212904Sdim} 214202878Srdivacky 215252723Sdim// Look ahead to search for first non-hex digit, if it's [hH], then we treat the 216252723Sdim// integer as a hexadecimal, possibly with leading zeroes. 217252723Sdimstatic unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { 218252723Sdim const char *FirstHex = 0; 219252723Sdim const char *LookAhead = CurPtr; 220252723Sdim while (1) { 221252723Sdim if (isdigit(*LookAhead)) { 222252723Sdim ++LookAhead; 223252723Sdim } else if (isxdigit(*LookAhead)) { 224252723Sdim if (!FirstHex) 225252723Sdim FirstHex = LookAhead; 226252723Sdim ++LookAhead; 227252723Sdim } else { 228252723Sdim break; 229252723Sdim } 230252723Sdim } 231252723Sdim bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; 232252723Sdim CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; 233252723Sdim if (isHex) 234252723Sdim return 16; 235252723Sdim return DefaultRadix; 236252723Sdim} 237252723Sdim 238202878Srdivacky/// LexDigit: First character is [0-9]. 239202878Srdivacky/// Local Label: [0-9][:] 240202878Srdivacky/// Forward/Backward Label: [0-9][fb] 241202878Srdivacky/// Binary integer: 0b[01]+ 242202878Srdivacky/// Octal integer: 0[0-7]+ 243252723Sdim/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] 244202878Srdivacky/// Decimal integer: [1-9][0-9]* 245202878SrdivackyAsmToken AsmLexer::LexDigit() { 246202878Srdivacky // Decimal integer: [1-9][0-9]* 247218893Sdim if (CurPtr[-1] != '0' || CurPtr[0] == '.') { 248252723Sdim unsigned Radix = doLookAhead(CurPtr, 10); 249252723Sdim bool isHex = Radix == 16; 250218893Sdim // Check for floating point literals. 251252723Sdim if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { 252218893Sdim ++CurPtr; 253218893Sdim return LexFloatLiteral(); 254218893Sdim } 255218893Sdim 256202878Srdivacky StringRef Result(TokStart, CurPtr - TokStart); 257212904Sdim 258202878Srdivacky long long Value; 259252723Sdim if (Result.getAsInteger(Radix, Value)) { 260218893Sdim // Allow positive values that are too large to fit into a signed 64-bit 261218893Sdim // integer, but that do fit in an unsigned one, we just convert them over. 262218893Sdim unsigned long long UValue; 263252723Sdim if (Result.getAsInteger(Radix, UValue)) 264252723Sdim return ReturnError(TokStart, !isHex ? "invalid decimal number" : 265252723Sdim "invalid hexdecimal number"); 266218893Sdim Value = (long long)UValue; 267205218Srdivacky } 268218893Sdim 269252723Sdim // Consume the [bB][hH]. 270252723Sdim if (Radix == 2 || Radix == 16) 271252723Sdim ++CurPtr; 272252723Sdim 273252723Sdim // The darwin/x86 (and x86-64) assembler accepts and ignores type 274252723Sdim // suffices on integer literals. 275212904Sdim SkipIgnoredIntegerSuffix(CurPtr); 276218893Sdim 277202878Srdivacky return AsmToken(AsmToken::Integer, Result, Value); 278202878Srdivacky } 279218893Sdim 280202878Srdivacky if (*CurPtr == 'b') { 281202878Srdivacky ++CurPtr; 282208599Srdivacky // See if we actually have "0b" as part of something like "jmp 0b\n" 283208599Srdivacky if (!isdigit(CurPtr[0])) { 284208599Srdivacky --CurPtr; 285208599Srdivacky StringRef Result(TokStart, CurPtr - TokStart); 286208599Srdivacky return AsmToken(AsmToken::Integer, Result, 0); 287208599Srdivacky } 288202878Srdivacky const char *NumStart = CurPtr; 289202878Srdivacky while (CurPtr[0] == '0' || CurPtr[0] == '1') 290202878Srdivacky ++CurPtr; 291218893Sdim 292202878Srdivacky // Requires at least one binary digit. 293202878Srdivacky if (CurPtr == NumStart) 294221345Sdim return ReturnError(TokStart, "invalid binary number"); 295218893Sdim 296202878Srdivacky StringRef Result(TokStart, CurPtr - TokStart); 297218893Sdim 298202878Srdivacky long long Value; 299212904Sdim if (Result.substr(2).getAsInteger(2, Value)) 300221345Sdim return ReturnError(TokStart, "invalid binary number"); 301218893Sdim 302212904Sdim // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 303212904Sdim // suffixes on integer literals. 304212904Sdim SkipIgnoredIntegerSuffix(CurPtr); 305218893Sdim 306202878Srdivacky return AsmToken(AsmToken::Integer, Result, Value); 307202878Srdivacky } 308218893Sdim 309202878Srdivacky if (*CurPtr == 'x') { 310202878Srdivacky ++CurPtr; 311202878Srdivacky const char *NumStart = CurPtr; 312202878Srdivacky while (isxdigit(CurPtr[0])) 313202878Srdivacky ++CurPtr; 314218893Sdim 315263509Sdim // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be 316263509Sdim // diagnosed by LexHexFloatLiteral). 317263509Sdim if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') 318263509Sdim return LexHexFloatLiteral(NumStart == CurPtr); 319263509Sdim 320263509Sdim // Otherwise requires at least one hex digit. 321202878Srdivacky if (CurPtr == NumStart) 322221345Sdim return ReturnError(CurPtr-2, "invalid hexadecimal number"); 323202878Srdivacky 324202878Srdivacky unsigned long long Result; 325202878Srdivacky if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) 326221345Sdim return ReturnError(TokStart, "invalid hexadecimal number"); 327218893Sdim 328252723Sdim // Consume the optional [hH]. 329252723Sdim if (*CurPtr == 'h' || *CurPtr == 'H') 330252723Sdim ++CurPtr; 331252723Sdim 332212904Sdim // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 333212904Sdim // suffixes on integer literals. 334212904Sdim SkipIgnoredIntegerSuffix(CurPtr); 335218893Sdim 336202878Srdivacky return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), 337202878Srdivacky (int64_t)Result); 338202878Srdivacky } 339218893Sdim 340252723Sdim // Either octal or hexadecimal. 341252723Sdim long long Value; 342252723Sdim unsigned Radix = doLookAhead(CurPtr, 8); 343252723Sdim bool isHex = Radix == 16; 344252723Sdim StringRef Result(TokStart, CurPtr - TokStart); 345252723Sdim if (Result.getAsInteger(Radix, Value)) 346252723Sdim return ReturnError(TokStart, !isHex ? "invalid octal number" : 347252723Sdim "invalid hexdecimal number"); 348252723Sdim 349252723Sdim // Consume the [hH]. 350252723Sdim if (Radix == 16) 351202878Srdivacky ++CurPtr; 352218893Sdim 353212904Sdim // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 354212904Sdim // suffixes on integer literals. 355212904Sdim SkipIgnoredIntegerSuffix(CurPtr); 356218893Sdim 357202878Srdivacky return AsmToken(AsmToken::Integer, Result, Value); 358202878Srdivacky} 359202878Srdivacky 360218893Sdim/// LexSingleQuote: Integer: 'b' 361218893SdimAsmToken AsmLexer::LexSingleQuote() { 362218893Sdim int CurChar = getNextChar(); 363218893Sdim 364218893Sdim if (CurChar == '\\') 365218893Sdim CurChar = getNextChar(); 366218893Sdim 367218893Sdim if (CurChar == EOF) 368218893Sdim return ReturnError(TokStart, "unterminated single quote"); 369218893Sdim 370218893Sdim CurChar = getNextChar(); 371218893Sdim 372218893Sdim if (CurChar != '\'') 373218893Sdim return ReturnError(TokStart, "single quote way too long"); 374218893Sdim 375218893Sdim // The idea here being that 'c' is basically just an integral 376218893Sdim // constant. 377218893Sdim StringRef Res = StringRef(TokStart,CurPtr - TokStart); 378218893Sdim long long Value; 379218893Sdim 380218893Sdim if (Res.startswith("\'\\")) { 381218893Sdim char theChar = Res[2]; 382218893Sdim switch (theChar) { 383218893Sdim default: Value = theChar; break; 384218893Sdim case '\'': Value = '\''; break; 385218893Sdim case 't': Value = '\t'; break; 386218893Sdim case 'n': Value = '\n'; break; 387218893Sdim case 'b': Value = '\b'; break; 388218893Sdim } 389218893Sdim } else 390218893Sdim Value = TokStart[1]; 391218893Sdim 392218893Sdim return AsmToken(AsmToken::Integer, Res, Value); 393218893Sdim} 394218893Sdim 395218893Sdim 396202878Srdivacky/// LexQuote: String: "..." 397202878SrdivackyAsmToken AsmLexer::LexQuote() { 398202878Srdivacky int CurChar = getNextChar(); 399202878Srdivacky // TODO: does gas allow multiline string constants? 400202878Srdivacky while (CurChar != '"') { 401202878Srdivacky if (CurChar == '\\') { 402202878Srdivacky // Allow \", etc. 403202878Srdivacky CurChar = getNextChar(); 404202878Srdivacky } 405218893Sdim 406202878Srdivacky if (CurChar == EOF) 407202878Srdivacky return ReturnError(TokStart, "unterminated string constant"); 408202878Srdivacky 409202878Srdivacky CurChar = getNextChar(); 410202878Srdivacky } 411218893Sdim 412202878Srdivacky return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 413202878Srdivacky} 414202878Srdivacky 415202878SrdivackyStringRef AsmLexer::LexUntilEndOfStatement() { 416202878Srdivacky TokStart = CurPtr; 417202878Srdivacky 418221345Sdim while (!isAtStartOfComment(*CurPtr) && // Start of line comment. 419221345Sdim !isAtStatementSeparator(CurPtr) && // End of statement marker. 420202878Srdivacky *CurPtr != '\n' && 421202878Srdivacky *CurPtr != '\r' && 422202878Srdivacky (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { 423202878Srdivacky ++CurPtr; 424202878Srdivacky } 425202878Srdivacky return StringRef(TokStart, CurPtr-TokStart); 426202878Srdivacky} 427202878Srdivacky 428226890SdimStringRef AsmLexer::LexUntilEndOfLine() { 429226890Sdim TokStart = CurPtr; 430226890Sdim 431226890Sdim while (*CurPtr != '\n' && 432226890Sdim *CurPtr != '\r' && 433226890Sdim (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { 434226890Sdim ++CurPtr; 435226890Sdim } 436226890Sdim return StringRef(TokStart, CurPtr-TokStart); 437226890Sdim} 438226890Sdim 439202878Srdivackybool AsmLexer::isAtStartOfComment(char Char) { 440202878Srdivacky // FIXME: This won't work for multi-character comment indicators like "//". 441202878Srdivacky return Char == *MAI.getCommentString(); 442202878Srdivacky} 443202878Srdivacky 444221345Sdimbool AsmLexer::isAtStatementSeparator(const char *Ptr) { 445221345Sdim return strncmp(Ptr, MAI.getSeparatorString(), 446221345Sdim strlen(MAI.getSeparatorString())) == 0; 447221345Sdim} 448221345Sdim 449202878SrdivackyAsmToken AsmLexer::LexToken() { 450202878Srdivacky TokStart = CurPtr; 451202878Srdivacky // This always consumes at least one character. 452202878Srdivacky int CurChar = getNextChar(); 453218893Sdim 454226890Sdim if (isAtStartOfComment(CurChar)) { 455226890Sdim // If this comment starts with a '#', then return the Hash token and let 456226890Sdim // the assembler parser see if it can be parsed as a cpp line filename 457226890Sdim // comment. We do this only if we are at the start of a line. 458226890Sdim if (CurChar == '#' && isAtStartOfLine) 459226890Sdim return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 460226890Sdim isAtStartOfLine = true; 461202878Srdivacky return LexLineComment(); 462226890Sdim } 463221345Sdim if (isAtStatementSeparator(TokStart)) { 464221345Sdim CurPtr += strlen(MAI.getSeparatorString()) - 1; 465221345Sdim return AsmToken(AsmToken::EndOfStatement, 466221345Sdim StringRef(TokStart, strlen(MAI.getSeparatorString()))); 467221345Sdim } 468202878Srdivacky 469226890Sdim // If we're missing a newline at EOF, make sure we still get an 470226890Sdim // EndOfStatement token before the Eof token. 471226890Sdim if (CurChar == EOF && !isAtStartOfLine) { 472226890Sdim isAtStartOfLine = true; 473226890Sdim return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 474226890Sdim } 475226890Sdim 476226890Sdim isAtStartOfLine = false; 477202878Srdivacky switch (CurChar) { 478202878Srdivacky default: 479202878Srdivacky // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 480202878Srdivacky if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') 481202878Srdivacky return LexIdentifier(); 482218893Sdim 483202878Srdivacky // Unknown character, emit an error. 484202878Srdivacky return ReturnError(TokStart, "invalid character in input"); 485202878Srdivacky case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 486202878Srdivacky case 0: 487202878Srdivacky case ' ': 488202878Srdivacky case '\t': 489245431Sdim if (SkipSpace) { 490245431Sdim // Ignore whitespace. 491245431Sdim return LexToken(); 492245431Sdim } else { 493245431Sdim int len = 1; 494245431Sdim while (*CurPtr==' ' || *CurPtr=='\t') { 495245431Sdim CurPtr++; 496245431Sdim len++; 497245431Sdim } 498245431Sdim return AsmToken(AsmToken::Space, StringRef(TokStart, len)); 499245431Sdim } 500202878Srdivacky case '\n': // FALL THROUGH. 501221345Sdim case '\r': 502226890Sdim isAtStartOfLine = true; 503221345Sdim return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 504202878Srdivacky case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); 505202878Srdivacky case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); 506202878Srdivacky case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); 507202878Srdivacky case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); 508202878Srdivacky case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); 509202878Srdivacky case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); 510202878Srdivacky case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); 511202878Srdivacky case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); 512202878Srdivacky case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); 513202878Srdivacky case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); 514202878Srdivacky case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); 515202878Srdivacky case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); 516202878Srdivacky case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); 517208599Srdivacky case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); 518223017Sdim case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); 519218893Sdim case '=': 520202878Srdivacky if (*CurPtr == '=') 521202878Srdivacky return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); 522202878Srdivacky return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); 523218893Sdim case '|': 524202878Srdivacky if (*CurPtr == '|') 525202878Srdivacky return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); 526202878Srdivacky return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); 527202878Srdivacky case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); 528218893Sdim case '&': 529202878Srdivacky if (*CurPtr == '&') 530202878Srdivacky return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); 531202878Srdivacky return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); 532218893Sdim case '!': 533202878Srdivacky if (*CurPtr == '=') 534202878Srdivacky return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); 535202878Srdivacky return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); 536202878Srdivacky case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); 537202878Srdivacky case '/': return LexSlash(); 538202878Srdivacky case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 539218893Sdim case '\'': return LexSingleQuote(); 540202878Srdivacky case '"': return LexQuote(); 541202878Srdivacky case '0': case '1': case '2': case '3': case '4': 542202878Srdivacky case '5': case '6': case '7': case '8': case '9': 543202878Srdivacky return LexDigit(); 544202878Srdivacky case '<': 545202878Srdivacky switch (*CurPtr) { 546218893Sdim case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, 547202878Srdivacky StringRef(TokStart, 2)); 548218893Sdim case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, 549202878Srdivacky StringRef(TokStart, 2)); 550218893Sdim case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, 551202878Srdivacky StringRef(TokStart, 2)); 552202878Srdivacky default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); 553202878Srdivacky } 554202878Srdivacky case '>': 555202878Srdivacky switch (*CurPtr) { 556218893Sdim case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, 557202878Srdivacky StringRef(TokStart, 2)); 558218893Sdim case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, 559202878Srdivacky StringRef(TokStart, 2)); 560202878Srdivacky default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); 561202878Srdivacky } 562218893Sdim 563202878Srdivacky // TODO: Quoted identifiers (objc methods etc) 564202878Srdivacky // local labels: [0-9][:] 565202878Srdivacky // Forward/backward labels: [0-9][fb] 566202878Srdivacky // Integers, fp constants, character constants. 567202878Srdivacky } 568202878Srdivacky} 569