1193323Sed//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// Implement the Lexer for .ll files.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#include "LLLexer.h"
15252723Sdim#include "llvm/ADT/StringExtras.h"
16218893Sdim#include "llvm/ADT/Twine.h"
17218893Sdim#include "llvm/Assembly/Parser.h"
18252723Sdim#include "llvm/IR/DerivedTypes.h"
19252723Sdim#include "llvm/IR/Instruction.h"
20252723Sdim#include "llvm/IR/LLVMContext.h"
21198090Srdivacky#include "llvm/Support/ErrorHandling.h"
22252723Sdim#include "llvm/Support/MathExtras.h"
23193323Sed#include "llvm/Support/MemoryBuffer.h"
24195340Sed#include "llvm/Support/SourceMgr.h"
25193323Sed#include "llvm/Support/raw_ostream.h"
26218893Sdim#include <cctype>
27198090Srdivacky#include <cstdio>
28193323Sed#include <cstdlib>
29193323Sed#include <cstring>
30193323Sedusing namespace llvm;
31193323Sed
32218893Sdimbool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
33235633Sdim  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
34193323Sed  return true;
35193323Sed}
36193323Sed
37193323Sed//===----------------------------------------------------------------------===//
38193323Sed// Helper functions.
39193323Sed//===----------------------------------------------------------------------===//
40193323Sed
41193323Sed// atoull - Convert an ascii string of decimal digits into the unsigned long
42193323Sed// long representation... this does not have to do input error checking,
43193323Sed// because we know that the input will be matched by a suitable regex...
44193323Sed//
45193323Seduint64_t LLLexer::atoull(const char *Buffer, const char *End) {
46193323Sed  uint64_t Result = 0;
47193323Sed  for (; Buffer != End; Buffer++) {
48193323Sed    uint64_t OldRes = Result;
49193323Sed    Result *= 10;
50193323Sed    Result += *Buffer-'0';
51193323Sed    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
52193323Sed      Error("constant bigger than 64 bits detected!");
53193323Sed      return 0;
54193323Sed    }
55193323Sed  }
56193323Sed  return Result;
57193323Sed}
58193323Sed
59193323Seduint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
60193323Sed  uint64_t Result = 0;
61193323Sed  for (; Buffer != End; ++Buffer) {
62193323Sed    uint64_t OldRes = Result;
63193323Sed    Result *= 16;
64252723Sdim    Result += hexDigitValue(*Buffer);
65193323Sed
66193323Sed    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
67193323Sed      Error("constant bigger than 64 bits detected!");
68193323Sed      return 0;
69193323Sed    }
70193323Sed  }
71193323Sed  return Result;
72193323Sed}
73193323Sed
74193323Sedvoid LLLexer::HexToIntPair(const char *Buffer, const char *End,
75193323Sed                           uint64_t Pair[2]) {
76193323Sed  Pair[0] = 0;
77193323Sed  for (int i=0; i<16; i++, Buffer++) {
78193323Sed    assert(Buffer != End);
79193323Sed    Pair[0] *= 16;
80252723Sdim    Pair[0] += hexDigitValue(*Buffer);
81193323Sed  }
82193323Sed  Pair[1] = 0;
83193323Sed  for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
84193323Sed    Pair[1] *= 16;
85252723Sdim    Pair[1] += hexDigitValue(*Buffer);
86193323Sed  }
87193323Sed  if (Buffer != End)
88193323Sed    Error("constant bigger than 128 bits detected!");
89193323Sed}
90193323Sed
91193323Sed/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
92193323Sed/// { low64, high16 } as usual for an APInt.
93193323Sedvoid LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
94193323Sed                           uint64_t Pair[2]) {
95193323Sed  Pair[1] = 0;
96193323Sed  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
97193323Sed    assert(Buffer != End);
98193323Sed    Pair[1] *= 16;
99252723Sdim    Pair[1] += hexDigitValue(*Buffer);
100193323Sed  }
101193323Sed  Pair[0] = 0;
102193323Sed  for (int i=0; i<16; i++, Buffer++) {
103193323Sed    Pair[0] *= 16;
104252723Sdim    Pair[0] += hexDigitValue(*Buffer);
105193323Sed  }
106193323Sed  if (Buffer != End)
107193323Sed    Error("constant bigger than 128 bits detected!");
108193323Sed}
109193323Sed
110193323Sed// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
111193323Sed// appropriate character.
112193323Sedstatic void UnEscapeLexed(std::string &Str) {
113193323Sed  if (Str.empty()) return;
114193323Sed
115193323Sed  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
116193323Sed  char *BOut = Buffer;
117193323Sed  for (char *BIn = Buffer; BIn != EndBuffer; ) {
118193323Sed    if (BIn[0] == '\\') {
119193323Sed      if (BIn < EndBuffer-1 && BIn[1] == '\\') {
120193323Sed        *BOut++ = '\\'; // Two \ becomes one
121193323Sed        BIn += 2;
122252723Sdim      } else if (BIn < EndBuffer-2 &&
123252723Sdim                 isxdigit(static_cast<unsigned char>(BIn[1])) &&
124252723Sdim                 isxdigit(static_cast<unsigned char>(BIn[2]))) {
125252723Sdim        *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
126193323Sed        BIn += 3;                           // Skip over handled chars
127193323Sed        ++BOut;
128193323Sed      } else {
129193323Sed        *BOut++ = *BIn++;
130193323Sed      }
131193323Sed    } else {
132193323Sed      *BOut++ = *BIn++;
133193323Sed    }
134193323Sed  }
135193323Sed  Str.resize(BOut-Buffer);
136193323Sed}
137193323Sed
138193323Sed/// isLabelChar - Return true for [-a-zA-Z$._0-9].
139193323Sedstatic bool isLabelChar(char C) {
140252723Sdim  return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
141252723Sdim         C == '.' || C == '_';
142193323Sed}
143193323Sed
144193323Sed
145193323Sed/// isLabelTail - Return true if this pointer points to a valid end of a label.
146193323Sedstatic const char *isLabelTail(const char *CurPtr) {
147193323Sed  while (1) {
148193323Sed    if (CurPtr[0] == ':') return CurPtr+1;
149193323Sed    if (!isLabelChar(CurPtr[0])) return 0;
150193323Sed    ++CurPtr;
151193323Sed  }
152193323Sed}
153193323Sed
154193323Sed
155193323Sed
156193323Sed//===----------------------------------------------------------------------===//
157193323Sed// Lexer definition.
158193323Sed//===----------------------------------------------------------------------===//
159193323Sed
160198090SrdivackyLLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
161198090Srdivacky                 LLVMContext &C)
162198090Srdivacky  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
163193323Sed  CurPtr = CurBuf->getBufferStart();
164193323Sed}
165193323Sed
166193323Sedstd::string LLLexer::getFilename() const {
167193323Sed  return CurBuf->getBufferIdentifier();
168193323Sed}
169193323Sed
170193323Sedint LLLexer::getNextChar() {
171193323Sed  char CurChar = *CurPtr++;
172193323Sed  switch (CurChar) {
173193323Sed  default: return (unsigned char)CurChar;
174193323Sed  case 0:
175193323Sed    // A nul character in the stream is either the end of the current buffer or
176193323Sed    // a random nul in the file.  Disambiguate that here.
177193323Sed    if (CurPtr-1 != CurBuf->getBufferEnd())
178193323Sed      return 0;  // Just whitespace.
179193323Sed
180193323Sed    // Otherwise, return end of file.
181193323Sed    --CurPtr;  // Another call to lex will return EOF again.
182193323Sed    return EOF;
183193323Sed  }
184193323Sed}
185193323Sed
186193323Sed
187193323Sedlltok::Kind LLLexer::LexToken() {
188193323Sed  TokStart = CurPtr;
189193323Sed
190193323Sed  int CurChar = getNextChar();
191193323Sed  switch (CurChar) {
192193323Sed  default:
193193323Sed    // Handle letters: [a-zA-Z_]
194252723Sdim    if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
195193323Sed      return LexIdentifier();
196193323Sed
197193323Sed    return lltok::Error;
198193323Sed  case EOF: return lltok::Eof;
199193323Sed  case 0:
200193323Sed  case ' ':
201193323Sed  case '\t':
202193323Sed  case '\n':
203193323Sed  case '\r':
204193323Sed    // Ignore whitespace.
205193323Sed    return LexToken();
206193323Sed  case '+': return LexPositive();
207193323Sed  case '@': return LexAt();
208193323Sed  case '%': return LexPercent();
209193323Sed  case '"': return LexQuote();
210193323Sed  case '.':
211193323Sed    if (const char *Ptr = isLabelTail(CurPtr)) {
212193323Sed      CurPtr = Ptr;
213193323Sed      StrVal.assign(TokStart, CurPtr-1);
214193323Sed      return lltok::LabelStr;
215193323Sed    }
216193323Sed    if (CurPtr[0] == '.' && CurPtr[1] == '.') {
217193323Sed      CurPtr += 2;
218193323Sed      return lltok::dotdotdot;
219193323Sed    }
220193323Sed    return lltok::Error;
221193323Sed  case '$':
222193323Sed    if (const char *Ptr = isLabelTail(CurPtr)) {
223193323Sed      CurPtr = Ptr;
224193323Sed      StrVal.assign(TokStart, CurPtr-1);
225193323Sed      return lltok::LabelStr;
226193323Sed    }
227193323Sed    return lltok::Error;
228193323Sed  case ';':
229193323Sed    SkipLineComment();
230193323Sed    return LexToken();
231201360Srdivacky  case '!': return LexExclaim();
232252723Sdim  case '#': return LexHash();
233193323Sed  case '0': case '1': case '2': case '3': case '4':
234193323Sed  case '5': case '6': case '7': case '8': case '9':
235193323Sed  case '-':
236193323Sed    return LexDigitOrNegative();
237193323Sed  case '=': return lltok::equal;
238193323Sed  case '[': return lltok::lsquare;
239193323Sed  case ']': return lltok::rsquare;
240193323Sed  case '{': return lltok::lbrace;
241193323Sed  case '}': return lltok::rbrace;
242193323Sed  case '<': return lltok::less;
243193323Sed  case '>': return lltok::greater;
244193323Sed  case '(': return lltok::lparen;
245193323Sed  case ')': return lltok::rparen;
246193323Sed  case ',': return lltok::comma;
247193323Sed  case '*': return lltok::star;
248193323Sed  case '\\': return lltok::backslash;
249193323Sed  }
250193323Sed}
251193323Sed
252193323Sedvoid LLLexer::SkipLineComment() {
253193323Sed  while (1) {
254193323Sed    if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
255193323Sed      return;
256193323Sed  }
257193323Sed}
258193323Sed
259193323Sed/// LexAt - Lex all tokens that start with an @ character:
260193323Sed///   GlobalVar   @\"[^\"]*\"
261193323Sed///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
262193323Sed///   GlobalVarID @[0-9]+
263193323Sedlltok::Kind LLLexer::LexAt() {
264193323Sed  // Handle AtStringConstant: @\"[^\"]*\"
265193323Sed  if (CurPtr[0] == '"') {
266193323Sed    ++CurPtr;
267193323Sed
268193323Sed    while (1) {
269193323Sed      int CurChar = getNextChar();
270193323Sed
271193323Sed      if (CurChar == EOF) {
272193323Sed        Error("end of file in global variable name");
273193323Sed        return lltok::Error;
274193323Sed      }
275193323Sed      if (CurChar == '"') {
276193323Sed        StrVal.assign(TokStart+2, CurPtr-1);
277193323Sed        UnEscapeLexed(StrVal);
278193323Sed        return lltok::GlobalVar;
279193323Sed      }
280193323Sed    }
281193323Sed  }
282193323Sed
283193323Sed  // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
284223017Sdim  if (ReadVarName())
285193323Sed    return lltok::GlobalVar;
286193323Sed
287193323Sed  // Handle GlobalVarID: @[0-9]+
288252723Sdim  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
289252723Sdim    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
290193323Sed      /*empty*/;
291193323Sed
292193323Sed    uint64_t Val = atoull(TokStart+1, CurPtr);
293193323Sed    if ((unsigned)Val != Val)
294193323Sed      Error("invalid value number (too large)!");
295193323Sed    UIntVal = unsigned(Val);
296193323Sed    return lltok::GlobalID;
297193323Sed  }
298193323Sed
299193323Sed  return lltok::Error;
300193323Sed}
301193323Sed
302223017Sdim/// ReadString - Read a string until the closing quote.
303223017Sdimlltok::Kind LLLexer::ReadString(lltok::Kind kind) {
304223017Sdim  const char *Start = CurPtr;
305223017Sdim  while (1) {
306223017Sdim    int CurChar = getNextChar();
307193323Sed
308223017Sdim    if (CurChar == EOF) {
309223017Sdim      Error("end of file in string constant");
310223017Sdim      return lltok::Error;
311223017Sdim    }
312223017Sdim    if (CurChar == '"') {
313223017Sdim      StrVal.assign(Start, CurPtr-1);
314223017Sdim      UnEscapeLexed(StrVal);
315223017Sdim      return kind;
316223017Sdim    }
317223017Sdim  }
318223017Sdim}
319223017Sdim
320223017Sdim/// ReadVarName - Read the rest of a token containing a variable name.
321223017Sdimbool LLLexer::ReadVarName() {
322223017Sdim  const char *NameStart = CurPtr;
323252723Sdim  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
324252723Sdim      CurPtr[0] == '-' || CurPtr[0] == '$' ||
325223017Sdim      CurPtr[0] == '.' || CurPtr[0] == '_') {
326223017Sdim    ++CurPtr;
327252723Sdim    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
328252723Sdim           CurPtr[0] == '-' || CurPtr[0] == '$' ||
329223017Sdim           CurPtr[0] == '.' || CurPtr[0] == '_')
330223017Sdim      ++CurPtr;
331223017Sdim
332223017Sdim    StrVal.assign(NameStart, CurPtr);
333223017Sdim    return true;
334223017Sdim  }
335223017Sdim  return false;
336223017Sdim}
337223017Sdim
338193323Sed/// LexPercent - Lex all tokens that start with a % character:
339193323Sed///   LocalVar   ::= %\"[^\"]*\"
340193323Sed///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
341193323Sed///   LocalVarID ::= %[0-9]+
342193323Sedlltok::Kind LLLexer::LexPercent() {
343193323Sed  // Handle LocalVarName: %\"[^\"]*\"
344193323Sed  if (CurPtr[0] == '"') {
345193323Sed    ++CurPtr;
346223017Sdim    return ReadString(lltok::LocalVar);
347193323Sed  }
348193323Sed
349193323Sed  // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
350223017Sdim  if (ReadVarName())
351193323Sed    return lltok::LocalVar;
352193323Sed
353193323Sed  // Handle LocalVarID: %[0-9]+
354252723Sdim  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
355252723Sdim    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
356193323Sed      /*empty*/;
357193323Sed
358193323Sed    uint64_t Val = atoull(TokStart+1, CurPtr);
359193323Sed    if ((unsigned)Val != Val)
360193323Sed      Error("invalid value number (too large)!");
361193323Sed    UIntVal = unsigned(Val);
362193323Sed    return lltok::LocalVarID;
363193323Sed  }
364193323Sed
365193323Sed  return lltok::Error;
366193323Sed}
367193323Sed
368193323Sed/// LexQuote - Lex all tokens that start with a " character:
369193323Sed///   QuoteLabel        "[^"]+":
370193323Sed///   StringConstant    "[^"]*"
371193323Sedlltok::Kind LLLexer::LexQuote() {
372223017Sdim  lltok::Kind kind = ReadString(lltok::StringConstant);
373223017Sdim  if (kind == lltok::Error || kind == lltok::Eof)
374223017Sdim    return kind;
375193323Sed
376223017Sdim  if (CurPtr[0] == ':') {
377193323Sed    ++CurPtr;
378223017Sdim    kind = lltok::LabelStr;
379193323Sed  }
380223017Sdim
381223017Sdim  return kind;
382193323Sed}
383193323Sed
384201360Srdivacky/// LexExclaim:
385198090Srdivacky///    !foo
386201360Srdivacky///    !
387201360Srdivackylltok::Kind LLLexer::LexExclaim() {
388201360Srdivacky  // Lex a metadata name as a MetadataVar.
389252723Sdim  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
390252723Sdim      CurPtr[0] == '-' || CurPtr[0] == '$' ||
391224145Sdim      CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
392198090Srdivacky    ++CurPtr;
393252723Sdim    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
394252723Sdim           CurPtr[0] == '-' || CurPtr[0] == '$' ||
395224145Sdim           CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
396198090Srdivacky      ++CurPtr;
397193323Sed
398198090Srdivacky    StrVal.assign(TokStart+1, CurPtr);   // Skip !
399224145Sdim    UnEscapeLexed(StrVal);
400201360Srdivacky    return lltok::MetadataVar;
401198090Srdivacky  }
402201360Srdivacky  return lltok::exclaim;
403198090Srdivacky}
404252723Sdim
405252723Sdim/// LexHash - Lex all tokens that start with a # character:
406252723Sdim///    AttrGrpID ::= #[0-9]+
407252723Sdimlltok::Kind LLLexer::LexHash() {
408252723Sdim  // Handle AttrGrpID: #[0-9]+
409252723Sdim  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
410252723Sdim    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
411252723Sdim      /*empty*/;
412252723Sdim
413252723Sdim    uint64_t Val = atoull(TokStart+1, CurPtr);
414252723Sdim    if ((unsigned)Val != Val)
415252723Sdim      Error("invalid value number (too large)!");
416252723Sdim    UIntVal = unsigned(Val);
417252723Sdim    return lltok::AttrGrpID;
418252723Sdim  }
419252723Sdim
420252723Sdim  return lltok::Error;
421252723Sdim}
422252723Sdim
423193323Sed/// LexIdentifier: Handle several related productions:
424193323Sed///    Label           [-a-zA-Z$._0-9]+:
425193323Sed///    IntegerType     i[0-9]+
426193323Sed///    Keyword         sdiv, float, ...
427193323Sed///    HexIntConstant  [us]0x[0-9A-Fa-f]+
428193323Sedlltok::Kind LLLexer::LexIdentifier() {
429193323Sed  const char *StartChar = CurPtr;
430193323Sed  const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
431193323Sed  const char *KeywordEnd = 0;
432193323Sed
433193323Sed  for (; isLabelChar(*CurPtr); ++CurPtr) {
434193323Sed    // If we decide this is an integer, remember the end of the sequence.
435252723Sdim    if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
436252723Sdim      IntEnd = CurPtr;
437252723Sdim    if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
438252723Sdim        *CurPtr != '_')
439252723Sdim      KeywordEnd = CurPtr;
440193323Sed  }
441193323Sed
442193323Sed  // If we stopped due to a colon, this really is a label.
443193323Sed  if (*CurPtr == ':') {
444193323Sed    StrVal.assign(StartChar-1, CurPtr++);
445193323Sed    return lltok::LabelStr;
446193323Sed  }
447193323Sed
448193323Sed  // Otherwise, this wasn't a label.  If this was valid as an integer type,
449193323Sed  // return it.
450193323Sed  if (IntEnd == 0) IntEnd = CurPtr;
451193323Sed  if (IntEnd != StartChar) {
452193323Sed    CurPtr = IntEnd;
453193323Sed    uint64_t NumBits = atoull(StartChar, CurPtr);
454193323Sed    if (NumBits < IntegerType::MIN_INT_BITS ||
455193323Sed        NumBits > IntegerType::MAX_INT_BITS) {
456193323Sed      Error("bitwidth for integer type out of range!");
457193323Sed      return lltok::Error;
458193323Sed    }
459198090Srdivacky    TyVal = IntegerType::get(Context, NumBits);
460193323Sed    return lltok::Type;
461193323Sed  }
462193323Sed
463193323Sed  // Otherwise, this was a letter sequence.  See which keyword this is.
464193323Sed  if (KeywordEnd == 0) KeywordEnd = CurPtr;
465193323Sed  CurPtr = KeywordEnd;
466193323Sed  --StartChar;
467193323Sed  unsigned Len = CurPtr-StartChar;
468252723Sdim#define KEYWORD(STR)                                                    \
469252723Sdim  do {                                                                  \
470252723Sdim    if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR)))  \
471252723Sdim      return lltok::kw_##STR;                                           \
472252723Sdim  } while (0)
473193323Sed
474193323Sed  KEYWORD(true);    KEYWORD(false);
475193323Sed  KEYWORD(declare); KEYWORD(define);
476193323Sed  KEYWORD(global);  KEYWORD(constant);
477193323Sed
478193323Sed  KEYWORD(private);
479198090Srdivacky  KEYWORD(linker_private);
480210299Sed  KEYWORD(linker_private_weak);
481193323Sed  KEYWORD(internal);
482193323Sed  KEYWORD(available_externally);
483193323Sed  KEYWORD(linkonce);
484193323Sed  KEYWORD(linkonce_odr);
485193323Sed  KEYWORD(weak);
486193323Sed  KEYWORD(weak_odr);
487193323Sed  KEYWORD(appending);
488193323Sed  KEYWORD(dllimport);
489193323Sed  KEYWORD(dllexport);
490193323Sed  KEYWORD(common);
491193323Sed  KEYWORD(default);
492193323Sed  KEYWORD(hidden);
493193323Sed  KEYWORD(protected);
494218893Sdim  KEYWORD(unnamed_addr);
495252723Sdim  KEYWORD(externally_initialized);
496193323Sed  KEYWORD(extern_weak);
497193323Sed  KEYWORD(external);
498193323Sed  KEYWORD(thread_local);
499245431Sdim  KEYWORD(localdynamic);
500245431Sdim  KEYWORD(initialexec);
501245431Sdim  KEYWORD(localexec);
502193323Sed  KEYWORD(zeroinitializer);
503193323Sed  KEYWORD(undef);
504193323Sed  KEYWORD(null);
505193323Sed  KEYWORD(to);
506193323Sed  KEYWORD(tail);
507193323Sed  KEYWORD(target);
508193323Sed  KEYWORD(triple);
509235633Sdim  KEYWORD(unwind);
510252723Sdim  KEYWORD(deplibs);             // FIXME: Remove in 4.0.
511193323Sed  KEYWORD(datalayout);
512193323Sed  KEYWORD(volatile);
513226890Sdim  KEYWORD(atomic);
514226890Sdim  KEYWORD(unordered);
515226890Sdim  KEYWORD(monotonic);
516226890Sdim  KEYWORD(acquire);
517226890Sdim  KEYWORD(release);
518226890Sdim  KEYWORD(acq_rel);
519226890Sdim  KEYWORD(seq_cst);
520226890Sdim  KEYWORD(singlethread);
521226890Sdim
522252723Sdim  KEYWORD(nnan);
523252723Sdim  KEYWORD(ninf);
524252723Sdim  KEYWORD(nsz);
525252723Sdim  KEYWORD(arcp);
526252723Sdim  KEYWORD(fast);
527198090Srdivacky  KEYWORD(nuw);
528198090Srdivacky  KEYWORD(nsw);
529198090Srdivacky  KEYWORD(exact);
530198090Srdivacky  KEYWORD(inbounds);
531193323Sed  KEYWORD(align);
532193323Sed  KEYWORD(addrspace);
533193323Sed  KEYWORD(section);
534193323Sed  KEYWORD(alias);
535193323Sed  KEYWORD(module);
536193323Sed  KEYWORD(asm);
537193323Sed  KEYWORD(sideeffect);
538198396Srdivacky  KEYWORD(alignstack);
539245431Sdim  KEYWORD(inteldialect);
540193323Sed  KEYWORD(gc);
541263509Sdim  KEYWORD(prefix);
542193323Sed
543193323Sed  KEYWORD(ccc);
544193323Sed  KEYWORD(fastcc);
545193323Sed  KEYWORD(coldcc);
546193323Sed  KEYWORD(x86_stdcallcc);
547193323Sed  KEYWORD(x86_fastcallcc);
548208599Srdivacky  KEYWORD(x86_thiscallcc);
549194612Sed  KEYWORD(arm_apcscc);
550194612Sed  KEYWORD(arm_aapcscc);
551194612Sed  KEYWORD(arm_aapcs_vfpcc);
552200581Srdivacky  KEYWORD(msp430_intrcc);
553218893Sdim  KEYWORD(ptx_kernel);
554218893Sdim  KEYWORD(ptx_device);
555245431Sdim  KEYWORD(spir_kernel);
556245431Sdim  KEYWORD(spir_func);
557245431Sdim  KEYWORD(intel_ocl_bicc);
558256382Sdim  KEYWORD(x86_64_sysvcc);
559256382Sdim  KEYWORD(x86_64_win64cc);
560263509Sdim  KEYWORD(webkit_jscc);
561263509Sdim  KEYWORD(anyregcc);
562194612Sed
563193323Sed  KEYWORD(cc);
564193323Sed  KEYWORD(c);
565193323Sed
566252723Sdim  KEYWORD(attributes);
567252723Sdim
568252723Sdim  KEYWORD(alwaysinline);
569263509Sdim  KEYWORD(builtin);
570252723Sdim  KEYWORD(byval);
571263509Sdim  KEYWORD(cold);
572252723Sdim  KEYWORD(inlinehint);
573193323Sed  KEYWORD(inreg);
574252723Sdim  KEYWORD(minsize);
575252723Sdim  KEYWORD(naked);
576252723Sdim  KEYWORD(nest);
577193323Sed  KEYWORD(noalias);
578252723Sdim  KEYWORD(nobuiltin);
579193323Sed  KEYWORD(nocapture);
580252723Sdim  KEYWORD(noduplicate);
581252723Sdim  KEYWORD(noimplicitfloat);
582252723Sdim  KEYWORD(noinline);
583252723Sdim  KEYWORD(nonlazybind);
584252723Sdim  KEYWORD(noredzone);
585252723Sdim  KEYWORD(noreturn);
586252723Sdim  KEYWORD(nounwind);
587263509Sdim  KEYWORD(optnone);
588252723Sdim  KEYWORD(optsize);
589193323Sed  KEYWORD(readnone);
590193323Sed  KEYWORD(readonly);
591252723Sdim  KEYWORD(returned);
592226890Sdim  KEYWORD(returns_twice);
593252723Sdim  KEYWORD(signext);
594252723Sdim  KEYWORD(sret);
595193323Sed  KEYWORD(ssp);
596193323Sed  KEYWORD(sspreq);
597252723Sdim  KEYWORD(sspstrong);
598252723Sdim  KEYWORD(sanitize_address);
599252723Sdim  KEYWORD(sanitize_thread);
600252723Sdim  KEYWORD(sanitize_memory);
601252723Sdim  KEYWORD(uwtable);
602252723Sdim  KEYWORD(zeroext);
603193323Sed
604193323Sed  KEYWORD(type);
605193323Sed  KEYWORD(opaque);
606193323Sed
607193323Sed  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
608193323Sed  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
609193323Sed  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
610193323Sed  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
611193323Sed
612226890Sdim  KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
613226890Sdim  KEYWORD(umin);
614226890Sdim
615193323Sed  KEYWORD(x);
616198892Srdivacky  KEYWORD(blockaddress);
617226890Sdim
618226890Sdim  KEYWORD(personality);
619226890Sdim  KEYWORD(cleanup);
620226890Sdim  KEYWORD(catch);
621226890Sdim  KEYWORD(filter);
622193323Sed#undef KEYWORD
623193323Sed
624193323Sed  // Keywords for types.
625193323Sed#define TYPEKEYWORD(STR, LLVMTY) \
626193323Sed  if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
627193323Sed    TyVal = LLVMTY; return lltok::Type; }
628198090Srdivacky  TYPEKEYWORD("void",      Type::getVoidTy(Context));
629235633Sdim  TYPEKEYWORD("half",      Type::getHalfTy(Context));
630198090Srdivacky  TYPEKEYWORD("float",     Type::getFloatTy(Context));
631198090Srdivacky  TYPEKEYWORD("double",    Type::getDoubleTy(Context));
632198090Srdivacky  TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
633198090Srdivacky  TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
634198090Srdivacky  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
635198090Srdivacky  TYPEKEYWORD("label",     Type::getLabelTy(Context));
636198090Srdivacky  TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
637218893Sdim  TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
638193323Sed#undef TYPEKEYWORD
639193323Sed
640193323Sed  // Keywords for instructions.
641193323Sed#define INSTKEYWORD(STR, Enum) \
642193323Sed  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
643193323Sed    UIntVal = Instruction::Enum; return lltok::kw_##STR; }
644193323Sed
645193574Sed  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
646193574Sed  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
647193574Sed  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
648193323Sed  INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
649193323Sed  INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
650193323Sed  INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
651193323Sed  INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
652193323Sed  INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
653193323Sed
654193323Sed  INSTKEYWORD(phi,         PHI);
655193323Sed  INSTKEYWORD(call,        Call);
656193323Sed  INSTKEYWORD(trunc,       Trunc);
657193323Sed  INSTKEYWORD(zext,        ZExt);
658193323Sed  INSTKEYWORD(sext,        SExt);
659193323Sed  INSTKEYWORD(fptrunc,     FPTrunc);
660193323Sed  INSTKEYWORD(fpext,       FPExt);
661193323Sed  INSTKEYWORD(uitofp,      UIToFP);
662193323Sed  INSTKEYWORD(sitofp,      SIToFP);
663193323Sed  INSTKEYWORD(fptoui,      FPToUI);
664193323Sed  INSTKEYWORD(fptosi,      FPToSI);
665193323Sed  INSTKEYWORD(inttoptr,    IntToPtr);
666193323Sed  INSTKEYWORD(ptrtoint,    PtrToInt);
667193323Sed  INSTKEYWORD(bitcast,     BitCast);
668263509Sdim  INSTKEYWORD(addrspacecast, AddrSpaceCast);
669193323Sed  INSTKEYWORD(select,      Select);
670193323Sed  INSTKEYWORD(va_arg,      VAArg);
671193323Sed  INSTKEYWORD(ret,         Ret);
672193323Sed  INSTKEYWORD(br,          Br);
673193323Sed  INSTKEYWORD(switch,      Switch);
674198892Srdivacky  INSTKEYWORD(indirectbr,  IndirectBr);
675193323Sed  INSTKEYWORD(invoke,      Invoke);
676226890Sdim  INSTKEYWORD(resume,      Resume);
677193323Sed  INSTKEYWORD(unreachable, Unreachable);
678193323Sed
679193323Sed  INSTKEYWORD(alloca,      Alloca);
680193323Sed  INSTKEYWORD(load,        Load);
681193323Sed  INSTKEYWORD(store,       Store);
682226890Sdim  INSTKEYWORD(cmpxchg,     AtomicCmpXchg);
683226890Sdim  INSTKEYWORD(atomicrmw,   AtomicRMW);
684226890Sdim  INSTKEYWORD(fence,       Fence);
685193323Sed  INSTKEYWORD(getelementptr, GetElementPtr);
686193323Sed
687193323Sed  INSTKEYWORD(extractelement, ExtractElement);
688193323Sed  INSTKEYWORD(insertelement,  InsertElement);
689193323Sed  INSTKEYWORD(shufflevector,  ShuffleVector);
690193323Sed  INSTKEYWORD(extractvalue,   ExtractValue);
691193323Sed  INSTKEYWORD(insertvalue,    InsertValue);
692226890Sdim  INSTKEYWORD(landingpad,     LandingPad);
693193323Sed#undef INSTKEYWORD
694193323Sed
695193323Sed  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
696193323Sed  // the CFE to avoid forcing it to deal with 64-bit numbers.
697193323Sed  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
698252723Sdim      TokStart[1] == '0' && TokStart[2] == 'x' &&
699252723Sdim      isxdigit(static_cast<unsigned char>(TokStart[3]))) {
700193323Sed    int len = CurPtr-TokStart-3;
701193323Sed    uint32_t bits = len * 4;
702198090Srdivacky    APInt Tmp(bits, StringRef(TokStart+3, len), 16);
703193323Sed    uint32_t activeBits = Tmp.getActiveBits();
704193323Sed    if (activeBits > 0 && activeBits < bits)
705218893Sdim      Tmp = Tmp.trunc(activeBits);
706193323Sed    APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
707193323Sed    return lltok::APSInt;
708193323Sed  }
709193323Sed
710193323Sed  // If this is "cc1234", return this as just "cc".
711193323Sed  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
712193323Sed    CurPtr = TokStart+2;
713193323Sed    return lltok::kw_cc;
714193323Sed  }
715193323Sed
716193323Sed  // Finally, if this isn't known, return an error.
717193323Sed  CurPtr = TokStart+1;
718193323Sed  return lltok::Error;
719193323Sed}
720193323Sed
721193323Sed
722193323Sed/// Lex0x: Handle productions that start with 0x, knowing that it matches and
723193323Sed/// that this is not a label:
724193323Sed///    HexFPConstant     0x[0-9A-Fa-f]+
725193323Sed///    HexFP80Constant   0xK[0-9A-Fa-f]+
726193323Sed///    HexFP128Constant  0xL[0-9A-Fa-f]+
727193323Sed///    HexPPC128Constant 0xM[0-9A-Fa-f]+
728245431Sdim///    HexHalfConstant   0xH[0-9A-Fa-f]+
729193323Sedlltok::Kind LLLexer::Lex0x() {
730193323Sed  CurPtr = TokStart + 2;
731193323Sed
732193323Sed  char Kind;
733245431Sdim  if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') {
734193323Sed    Kind = *CurPtr++;
735193323Sed  } else {
736193323Sed    Kind = 'J';
737193323Sed  }
738193323Sed
739252723Sdim  if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
740193323Sed    // Bad token, return it as an error.
741193323Sed    CurPtr = TokStart+1;
742193323Sed    return lltok::Error;
743193323Sed  }
744193323Sed
745252723Sdim  while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
746193323Sed    ++CurPtr;
747193323Sed
748193323Sed  if (Kind == 'J') {
749193323Sed    // HexFPConstant - Floating point constant represented in IEEE format as a
750193323Sed    // hexadecimal number for when exponential notation is not precise enough.
751235633Sdim    // Half, Float, and double only.
752193323Sed    APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
753193323Sed    return lltok::APFloat;
754193323Sed  }
755193323Sed
756193323Sed  uint64_t Pair[2];
757193323Sed  switch (Kind) {
758198090Srdivacky  default: llvm_unreachable("Unknown kind!");
759193323Sed  case 'K':
760193323Sed    // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
761193323Sed    FP80HexToIntPair(TokStart+3, CurPtr, Pair);
762252723Sdim    APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair));
763193323Sed    return lltok::APFloat;
764193323Sed  case 'L':
765193323Sed    // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
766193323Sed    HexToIntPair(TokStart+3, CurPtr, Pair);
767252723Sdim    APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair));
768193323Sed    return lltok::APFloat;
769193323Sed  case 'M':
770193323Sed    // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
771193323Sed    HexToIntPair(TokStart+3, CurPtr, Pair);
772252723Sdim    APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair));
773193323Sed    return lltok::APFloat;
774245431Sdim  case 'H':
775252723Sdim    APFloatVal = APFloat(APFloat::IEEEhalf,
776252723Sdim                         APInt(16,HexIntToVal(TokStart+3, CurPtr)));
777245431Sdim    return lltok::APFloat;
778193323Sed  }
779193323Sed}
780193323Sed
781193323Sed/// LexIdentifier: Handle several related productions:
782193323Sed///    Label             [-a-zA-Z$._0-9]+:
783193323Sed///    NInteger          -[0-9]+
784193323Sed///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
785193323Sed///    PInteger          [0-9]+
786193323Sed///    HexFPConstant     0x[0-9A-Fa-f]+
787193323Sed///    HexFP80Constant   0xK[0-9A-Fa-f]+
788193323Sed///    HexFP128Constant  0xL[0-9A-Fa-f]+
789193323Sed///    HexPPC128Constant 0xM[0-9A-Fa-f]+
790193323Sedlltok::Kind LLLexer::LexDigitOrNegative() {
791252723Sdim  // If the letter after the negative is not a number, this is probably a label.
792252723Sdim  if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
793252723Sdim      !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
794193323Sed    // Okay, this is not a number after the -, it's probably a label.
795193323Sed    if (const char *End = isLabelTail(CurPtr)) {
796193323Sed      StrVal.assign(TokStart, End-1);
797193323Sed      CurPtr = End;
798193323Sed      return lltok::LabelStr;
799193323Sed    }
800193323Sed
801193323Sed    return lltok::Error;
802193323Sed  }
803193323Sed
804193323Sed  // At this point, it is either a label, int or fp constant.
805193323Sed
806193323Sed  // Skip digits, we have at least one.
807252723Sdim  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
808193323Sed    /*empty*/;
809193323Sed
810193323Sed  // Check to see if this really is a label afterall, e.g. "-1:".
811193323Sed  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
812193323Sed    if (const char *End = isLabelTail(CurPtr)) {
813193323Sed      StrVal.assign(TokStart, End-1);
814193323Sed      CurPtr = End;
815193323Sed      return lltok::LabelStr;
816193323Sed    }
817193323Sed  }
818193323Sed
819193323Sed  // If the next character is a '.', then it is a fp value, otherwise its
820193323Sed  // integer.
821193323Sed  if (CurPtr[0] != '.') {
822193323Sed    if (TokStart[0] == '0' && TokStart[1] == 'x')
823193323Sed      return Lex0x();
824193323Sed    unsigned Len = CurPtr-TokStart;
825193323Sed    uint32_t numBits = ((Len * 64) / 19) + 2;
826198090Srdivacky    APInt Tmp(numBits, StringRef(TokStart, Len), 10);
827193323Sed    if (TokStart[0] == '-') {
828193323Sed      uint32_t minBits = Tmp.getMinSignedBits();
829193323Sed      if (minBits > 0 && minBits < numBits)
830218893Sdim        Tmp = Tmp.trunc(minBits);
831193323Sed      APSIntVal = APSInt(Tmp, false);
832193323Sed    } else {
833193323Sed      uint32_t activeBits = Tmp.getActiveBits();
834193323Sed      if (activeBits > 0 && activeBits < numBits)
835218893Sdim        Tmp = Tmp.trunc(activeBits);
836193323Sed      APSIntVal = APSInt(Tmp, true);
837193323Sed    }
838193323Sed    return lltok::APSInt;
839193323Sed  }
840193323Sed
841193323Sed  ++CurPtr;
842193323Sed
843193323Sed  // Skip over [0-9]*([eE][-+]?[0-9]+)?
844252723Sdim  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
845193323Sed
846193323Sed  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
847252723Sdim    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
848252723Sdim        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
849252723Sdim          isdigit(static_cast<unsigned char>(CurPtr[2])))) {
850193323Sed      CurPtr += 2;
851252723Sdim      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
852193323Sed    }
853193323Sed  }
854193323Sed
855218893Sdim  APFloatVal = APFloat(std::atof(TokStart));
856193323Sed  return lltok::APFloat;
857193323Sed}
858193323Sed
859193323Sed///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
860193323Sedlltok::Kind LLLexer::LexPositive() {
861193323Sed  // If the letter after the negative is a number, this is probably not a
862193323Sed  // label.
863252723Sdim  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
864193323Sed    return lltok::Error;
865193323Sed
866193323Sed  // Skip digits.
867252723Sdim  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
868193323Sed    /*empty*/;
869193323Sed
870193323Sed  // At this point, we need a '.'.
871193323Sed  if (CurPtr[0] != '.') {
872193323Sed    CurPtr = TokStart+1;
873193323Sed    return lltok::Error;
874193323Sed  }
875193323Sed
876193323Sed  ++CurPtr;
877193323Sed
878193323Sed  // Skip over [0-9]*([eE][-+]?[0-9]+)?
879252723Sdim  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
880193323Sed
881193323Sed  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
882252723Sdim    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
883252723Sdim        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
884252723Sdim        isdigit(static_cast<unsigned char>(CurPtr[2])))) {
885193323Sed      CurPtr += 2;
886252723Sdim      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
887193323Sed    }
888193323Sed  }
889193323Sed
890218893Sdim  APFloatVal = APFloat(std::atof(TokStart));
891193323Sed  return lltok::APFloat;
892193323Sed}
893