1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implement the Lexer for .ll files.
10//
11//===----------------------------------------------------------------------===//
12
13#include "LLLexer.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/StringExtras.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/IR/DerivedTypes.h"
19#include "llvm/IR/Instruction.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/SourceMgr.h"
22#include <cassert>
23#include <cctype>
24#include <cstdio>
25
26using namespace llvm;
27
28bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
29  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
30  return true;
31}
32
33void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
34  SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
35}
36
37//===----------------------------------------------------------------------===//
38// Helper functions.
39//===----------------------------------------------------------------------===//
40
41// atoull - Convert an ascii string of decimal digits into the unsigned long
42// long representation... this does not have to do input error checking,
43// because we know that the input will be matched by a suitable regex...
44//
45uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
46  uint64_t Result = 0;
47  for (; Buffer != End; Buffer++) {
48    uint64_t OldRes = Result;
49    Result *= 10;
50    Result += *Buffer-'0';
51    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
52      Error("constant bigger than 64 bits detected!");
53      return 0;
54    }
55  }
56  return Result;
57}
58
59uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
60  uint64_t Result = 0;
61  for (; Buffer != End; ++Buffer) {
62    uint64_t OldRes = Result;
63    Result *= 16;
64    Result += hexDigitValue(*Buffer);
65
66    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
67      Error("constant bigger than 64 bits detected!");
68      return 0;
69    }
70  }
71  return Result;
72}
73
74void LLLexer::HexToIntPair(const char *Buffer, const char *End,
75                           uint64_t Pair[2]) {
76  Pair[0] = 0;
77  if (End - Buffer >= 16) {
78    for (int i = 0; i < 16; i++, Buffer++) {
79      assert(Buffer != End);
80      Pair[0] *= 16;
81      Pair[0] += hexDigitValue(*Buffer);
82    }
83  }
84  Pair[1] = 0;
85  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
86    Pair[1] *= 16;
87    Pair[1] += hexDigitValue(*Buffer);
88  }
89  if (Buffer != End)
90    Error("constant bigger than 128 bits detected!");
91}
92
93/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
94/// { low64, high16 } as usual for an APInt.
95void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
96                           uint64_t Pair[2]) {
97  Pair[1] = 0;
98  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
99    assert(Buffer != End);
100    Pair[1] *= 16;
101    Pair[1] += hexDigitValue(*Buffer);
102  }
103  Pair[0] = 0;
104  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
105    Pair[0] *= 16;
106    Pair[0] += hexDigitValue(*Buffer);
107  }
108  if (Buffer != End)
109    Error("constant bigger than 128 bits detected!");
110}
111
112// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
113// appropriate character.
114static void UnEscapeLexed(std::string &Str) {
115  if (Str.empty()) return;
116
117  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
118  char *BOut = Buffer;
119  for (char *BIn = Buffer; BIn != EndBuffer; ) {
120    if (BIn[0] == '\\') {
121      if (BIn < EndBuffer-1 && BIn[1] == '\\') {
122        *BOut++ = '\\'; // Two \ becomes one
123        BIn += 2;
124      } else if (BIn < EndBuffer-2 &&
125                 isxdigit(static_cast<unsigned char>(BIn[1])) &&
126                 isxdigit(static_cast<unsigned char>(BIn[2]))) {
127        *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
128        BIn += 3;                           // Skip over handled chars
129        ++BOut;
130      } else {
131        *BOut++ = *BIn++;
132      }
133    } else {
134      *BOut++ = *BIn++;
135    }
136  }
137  Str.resize(BOut-Buffer);
138}
139
140/// isLabelChar - Return true for [-a-zA-Z$._0-9].
141static bool isLabelChar(char C) {
142  return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
143         C == '.' || C == '_';
144}
145
146/// isLabelTail - Return true if this pointer points to a valid end of a label.
147static const char *isLabelTail(const char *CurPtr) {
148  while (true) {
149    if (CurPtr[0] == ':') return CurPtr+1;
150    if (!isLabelChar(CurPtr[0])) return nullptr;
151    ++CurPtr;
152  }
153}
154
155//===----------------------------------------------------------------------===//
156// Lexer definition.
157//===----------------------------------------------------------------------===//
158
159LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
160                 LLVMContext &C)
161    : CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C), APFloatVal(0.0),
162      IgnoreColonInIdentifiers(false) {
163  CurPtr = CurBuf.begin();
164}
165
166int LLLexer::getNextChar() {
167  char CurChar = *CurPtr++;
168  switch (CurChar) {
169  default: return (unsigned char)CurChar;
170  case 0:
171    // A nul character in the stream is either the end of the current buffer or
172    // a random nul in the file.  Disambiguate that here.
173    if (CurPtr-1 != CurBuf.end())
174      return 0;  // Just whitespace.
175
176    // Otherwise, return end of file.
177    --CurPtr;  // Another call to lex will return EOF again.
178    return EOF;
179  }
180}
181
182lltok::Kind LLLexer::LexToken() {
183  while (true) {
184    TokStart = CurPtr;
185
186    int CurChar = getNextChar();
187    switch (CurChar) {
188    default:
189      // Handle letters: [a-zA-Z_]
190      if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
191        return LexIdentifier();
192
193      return lltok::Error;
194    case EOF: return lltok::Eof;
195    case 0:
196    case ' ':
197    case '\t':
198    case '\n':
199    case '\r':
200      // Ignore whitespace.
201      continue;
202    case '+': return LexPositive();
203    case '@': return LexAt();
204    case '$': return LexDollar();
205    case '%': return LexPercent();
206    case '"': return LexQuote();
207    case '.':
208      if (const char *Ptr = isLabelTail(CurPtr)) {
209        CurPtr = Ptr;
210        StrVal.assign(TokStart, CurPtr-1);
211        return lltok::LabelStr;
212      }
213      if (CurPtr[0] == '.' && CurPtr[1] == '.') {
214        CurPtr += 2;
215        return lltok::dotdotdot;
216      }
217      return lltok::Error;
218    case ';':
219      SkipLineComment();
220      continue;
221    case '!': return LexExclaim();
222    case '^':
223      return LexCaret();
224    case ':':
225      return lltok::colon;
226    case '#': return LexHash();
227    case '0': case '1': case '2': case '3': case '4':
228    case '5': case '6': case '7': case '8': case '9':
229    case '-':
230      return LexDigitOrNegative();
231    case '=': return lltok::equal;
232    case '[': return lltok::lsquare;
233    case ']': return lltok::rsquare;
234    case '{': return lltok::lbrace;
235    case '}': return lltok::rbrace;
236    case '<': return lltok::less;
237    case '>': return lltok::greater;
238    case '(': return lltok::lparen;
239    case ')': return lltok::rparen;
240    case ',': return lltok::comma;
241    case '*': return lltok::star;
242    case '|': return lltok::bar;
243    }
244  }
245}
246
247void LLLexer::SkipLineComment() {
248  while (true) {
249    if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
250      return;
251  }
252}
253
254/// Lex all tokens that start with an @ character.
255///   GlobalVar   @\"[^\"]*\"
256///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
257///   GlobalVarID @[0-9]+
258lltok::Kind LLLexer::LexAt() {
259  return LexVar(lltok::GlobalVar, lltok::GlobalID);
260}
261
262lltok::Kind LLLexer::LexDollar() {
263  if (const char *Ptr = isLabelTail(TokStart)) {
264    CurPtr = Ptr;
265    StrVal.assign(TokStart, CurPtr - 1);
266    return lltok::LabelStr;
267  }
268
269  // Handle DollarStringConstant: $\"[^\"]*\"
270  if (CurPtr[0] == '"') {
271    ++CurPtr;
272
273    while (true) {
274      int CurChar = getNextChar();
275
276      if (CurChar == EOF) {
277        Error("end of file in COMDAT variable name");
278        return lltok::Error;
279      }
280      if (CurChar == '"') {
281        StrVal.assign(TokStart + 2, CurPtr - 1);
282        UnEscapeLexed(StrVal);
283        if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
284          Error("Null bytes are not allowed in names");
285          return lltok::Error;
286        }
287        return lltok::ComdatVar;
288      }
289    }
290  }
291
292  // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
293  if (ReadVarName())
294    return lltok::ComdatVar;
295
296  return lltok::Error;
297}
298
299/// ReadString - Read a string until the closing quote.
300lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
301  const char *Start = CurPtr;
302  while (true) {
303    int CurChar = getNextChar();
304
305    if (CurChar == EOF) {
306      Error("end of file in string constant");
307      return lltok::Error;
308    }
309    if (CurChar == '"') {
310      StrVal.assign(Start, CurPtr-1);
311      UnEscapeLexed(StrVal);
312      return kind;
313    }
314  }
315}
316
317/// ReadVarName - Read the rest of a token containing a variable name.
318bool LLLexer::ReadVarName() {
319  const char *NameStart = CurPtr;
320  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
321      CurPtr[0] == '-' || CurPtr[0] == '$' ||
322      CurPtr[0] == '.' || CurPtr[0] == '_') {
323    ++CurPtr;
324    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
325           CurPtr[0] == '-' || CurPtr[0] == '$' ||
326           CurPtr[0] == '.' || CurPtr[0] == '_')
327      ++CurPtr;
328
329    StrVal.assign(NameStart, CurPtr);
330    return true;
331  }
332  return false;
333}
334
335// Lex an ID: [0-9]+. On success, the ID is stored in UIntVal and Token is
336// returned, otherwise the Error token is returned.
337lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
338  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
339    return lltok::Error;
340
341  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
342    /*empty*/;
343
344  uint64_t Val = atoull(TokStart + 1, CurPtr);
345  if ((unsigned)Val != Val)
346    Error("invalid value number (too large)!");
347  UIntVal = unsigned(Val);
348  return Token;
349}
350
351lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
352  // Handle StringConstant: \"[^\"]*\"
353  if (CurPtr[0] == '"') {
354    ++CurPtr;
355
356    while (true) {
357      int CurChar = getNextChar();
358
359      if (CurChar == EOF) {
360        Error("end of file in global variable name");
361        return lltok::Error;
362      }
363      if (CurChar == '"') {
364        StrVal.assign(TokStart+2, CurPtr-1);
365        UnEscapeLexed(StrVal);
366        if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
367          Error("Null bytes are not allowed in names");
368          return lltok::Error;
369        }
370        return Var;
371      }
372    }
373  }
374
375  // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
376  if (ReadVarName())
377    return Var;
378
379  // Handle VarID: [0-9]+
380  return LexUIntID(VarID);
381}
382
383/// Lex all tokens that start with a % character.
384///   LocalVar   ::= %\"[^\"]*\"
385///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
386///   LocalVarID ::= %[0-9]+
387lltok::Kind LLLexer::LexPercent() {
388  return LexVar(lltok::LocalVar, lltok::LocalVarID);
389}
390
391/// Lex all tokens that start with a " character.
392///   QuoteLabel        "[^"]+":
393///   StringConstant    "[^"]*"
394lltok::Kind LLLexer::LexQuote() {
395  lltok::Kind kind = ReadString(lltok::StringConstant);
396  if (kind == lltok::Error || kind == lltok::Eof)
397    return kind;
398
399  if (CurPtr[0] == ':') {
400    ++CurPtr;
401    if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
402      Error("Null bytes are not allowed in names");
403      kind = lltok::Error;
404    } else {
405      kind = lltok::LabelStr;
406    }
407  }
408
409  return kind;
410}
411
412/// Lex all tokens that start with a ! character.
413///    !foo
414///    !
415lltok::Kind LLLexer::LexExclaim() {
416  // Lex a metadata name as a MetadataVar.
417  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
418      CurPtr[0] == '-' || CurPtr[0] == '$' ||
419      CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
420    ++CurPtr;
421    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
422           CurPtr[0] == '-' || CurPtr[0] == '$' ||
423           CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
424      ++CurPtr;
425
426    StrVal.assign(TokStart+1, CurPtr);   // Skip !
427    UnEscapeLexed(StrVal);
428    return lltok::MetadataVar;
429  }
430  return lltok::exclaim;
431}
432
433/// Lex all tokens that start with a ^ character.
434///    SummaryID ::= ^[0-9]+
435lltok::Kind LLLexer::LexCaret() {
436  // Handle SummaryID: ^[0-9]+
437  return LexUIntID(lltok::SummaryID);
438}
439
440/// Lex all tokens that start with a # character.
441///    AttrGrpID ::= #[0-9]+
442lltok::Kind LLLexer::LexHash() {
443  // Handle AttrGrpID: #[0-9]+
444  return LexUIntID(lltok::AttrGrpID);
445}
446
447/// Lex a label, integer type, keyword, or hexadecimal integer constant.
448///    Label           [-a-zA-Z$._0-9]+:
449///    IntegerType     i[0-9]+
450///    Keyword         sdiv, float, ...
451///    HexIntConstant  [us]0x[0-9A-Fa-f]+
452lltok::Kind LLLexer::LexIdentifier() {
453  const char *StartChar = CurPtr;
454  const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
455  const char *KeywordEnd = nullptr;
456
457  for (; isLabelChar(*CurPtr); ++CurPtr) {
458    // If we decide this is an integer, remember the end of the sequence.
459    if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
460      IntEnd = CurPtr;
461    if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
462        *CurPtr != '_')
463      KeywordEnd = CurPtr;
464  }
465
466  // If we stopped due to a colon, unless we were directed to ignore it,
467  // this really is a label.
468  if (!IgnoreColonInIdentifiers && *CurPtr == ':') {
469    StrVal.assign(StartChar-1, CurPtr++);
470    return lltok::LabelStr;
471  }
472
473  // Otherwise, this wasn't a label.  If this was valid as an integer type,
474  // return it.
475  if (!IntEnd) IntEnd = CurPtr;
476  if (IntEnd != StartChar) {
477    CurPtr = IntEnd;
478    uint64_t NumBits = atoull(StartChar, CurPtr);
479    if (NumBits < IntegerType::MIN_INT_BITS ||
480        NumBits > IntegerType::MAX_INT_BITS) {
481      Error("bitwidth for integer type out of range!");
482      return lltok::Error;
483    }
484    TyVal = IntegerType::get(Context, NumBits);
485    return lltok::Type;
486  }
487
488  // Otherwise, this was a letter sequence.  See which keyword this is.
489  if (!KeywordEnd) KeywordEnd = CurPtr;
490  CurPtr = KeywordEnd;
491  --StartChar;
492  StringRef Keyword(StartChar, CurPtr - StartChar);
493
494#define KEYWORD(STR)                                                           \
495  do {                                                                         \
496    if (Keyword == #STR)                                                       \
497      return lltok::kw_##STR;                                                  \
498  } while (false)
499
500  KEYWORD(true);    KEYWORD(false);
501  KEYWORD(declare); KEYWORD(define);
502  KEYWORD(global);  KEYWORD(constant);
503
504  KEYWORD(dso_local);
505  KEYWORD(dso_preemptable);
506
507  KEYWORD(private);
508  KEYWORD(internal);
509  KEYWORD(available_externally);
510  KEYWORD(linkonce);
511  KEYWORD(linkonce_odr);
512  KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
513  KEYWORD(weak_odr);
514  KEYWORD(appending);
515  KEYWORD(dllimport);
516  KEYWORD(dllexport);
517  KEYWORD(common);
518  KEYWORD(default);
519  KEYWORD(hidden);
520  KEYWORD(protected);
521  KEYWORD(unnamed_addr);
522  KEYWORD(local_unnamed_addr);
523  KEYWORD(externally_initialized);
524  KEYWORD(extern_weak);
525  KEYWORD(external);
526  KEYWORD(thread_local);
527  KEYWORD(localdynamic);
528  KEYWORD(initialexec);
529  KEYWORD(localexec);
530  KEYWORD(zeroinitializer);
531  KEYWORD(undef);
532  KEYWORD(null);
533  KEYWORD(none);
534  KEYWORD(to);
535  KEYWORD(caller);
536  KEYWORD(within);
537  KEYWORD(from);
538  KEYWORD(tail);
539  KEYWORD(musttail);
540  KEYWORD(notail);
541  KEYWORD(target);
542  KEYWORD(triple);
543  KEYWORD(source_filename);
544  KEYWORD(unwind);
545  KEYWORD(deplibs);             // FIXME: Remove in 4.0.
546  KEYWORD(datalayout);
547  KEYWORD(volatile);
548  KEYWORD(atomic);
549  KEYWORD(unordered);
550  KEYWORD(monotonic);
551  KEYWORD(acquire);
552  KEYWORD(release);
553  KEYWORD(acq_rel);
554  KEYWORD(seq_cst);
555  KEYWORD(syncscope);
556
557  KEYWORD(nnan);
558  KEYWORD(ninf);
559  KEYWORD(nsz);
560  KEYWORD(arcp);
561  KEYWORD(contract);
562  KEYWORD(reassoc);
563  KEYWORD(afn);
564  KEYWORD(fast);
565  KEYWORD(nuw);
566  KEYWORD(nsw);
567  KEYWORD(exact);
568  KEYWORD(inbounds);
569  KEYWORD(inrange);
570  KEYWORD(align);
571  KEYWORD(addrspace);
572  KEYWORD(section);
573  KEYWORD(partition);
574  KEYWORD(alias);
575  KEYWORD(ifunc);
576  KEYWORD(module);
577  KEYWORD(asm);
578  KEYWORD(sideeffect);
579  KEYWORD(alignstack);
580  KEYWORD(inteldialect);
581  KEYWORD(gc);
582  KEYWORD(prefix);
583  KEYWORD(prologue);
584
585  KEYWORD(ccc);
586  KEYWORD(fastcc);
587  KEYWORD(coldcc);
588  KEYWORD(cfguard_checkcc);
589  KEYWORD(x86_stdcallcc);
590  KEYWORD(x86_fastcallcc);
591  KEYWORD(x86_thiscallcc);
592  KEYWORD(x86_vectorcallcc);
593  KEYWORD(arm_apcscc);
594  KEYWORD(arm_aapcscc);
595  KEYWORD(arm_aapcs_vfpcc);
596  KEYWORD(aarch64_vector_pcs);
597  KEYWORD(aarch64_sve_vector_pcs);
598  KEYWORD(msp430_intrcc);
599  KEYWORD(avr_intrcc);
600  KEYWORD(avr_signalcc);
601  KEYWORD(ptx_kernel);
602  KEYWORD(ptx_device);
603  KEYWORD(spir_kernel);
604  KEYWORD(spir_func);
605  KEYWORD(intel_ocl_bicc);
606  KEYWORD(x86_64_sysvcc);
607  KEYWORD(win64cc);
608  KEYWORD(x86_regcallcc);
609  KEYWORD(webkit_jscc);
610  KEYWORD(swiftcc);
611  KEYWORD(anyregcc);
612  KEYWORD(preserve_mostcc);
613  KEYWORD(preserve_allcc);
614  KEYWORD(ghccc);
615  KEYWORD(x86_intrcc);
616  KEYWORD(hhvmcc);
617  KEYWORD(hhvm_ccc);
618  KEYWORD(cxx_fast_tlscc);
619  KEYWORD(amdgpu_vs);
620  KEYWORD(amdgpu_ls);
621  KEYWORD(amdgpu_hs);
622  KEYWORD(amdgpu_es);
623  KEYWORD(amdgpu_gs);
624  KEYWORD(amdgpu_ps);
625  KEYWORD(amdgpu_cs);
626  KEYWORD(amdgpu_kernel);
627  KEYWORD(tailcc);
628
629  KEYWORD(cc);
630  KEYWORD(c);
631
632  KEYWORD(attributes);
633
634  KEYWORD(alwaysinline);
635  KEYWORD(allocsize);
636  KEYWORD(argmemonly);
637  KEYWORD(builtin);
638  KEYWORD(byval);
639  KEYWORD(inalloca);
640  KEYWORD(cold);
641  KEYWORD(convergent);
642  KEYWORD(dereferenceable);
643  KEYWORD(dereferenceable_or_null);
644  KEYWORD(inaccessiblememonly);
645  KEYWORD(inaccessiblemem_or_argmemonly);
646  KEYWORD(inlinehint);
647  KEYWORD(inreg);
648  KEYWORD(jumptable);
649  KEYWORD(minsize);
650  KEYWORD(naked);
651  KEYWORD(nest);
652  KEYWORD(noalias);
653  KEYWORD(nobuiltin);
654  KEYWORD(nocapture);
655  KEYWORD(noduplicate);
656  KEYWORD(nofree);
657  KEYWORD(noimplicitfloat);
658  KEYWORD(noinline);
659  KEYWORD(norecurse);
660  KEYWORD(nonlazybind);
661  KEYWORD(nonnull);
662  KEYWORD(noredzone);
663  KEYWORD(noreturn);
664  KEYWORD(nosync);
665  KEYWORD(nocf_check);
666  KEYWORD(nounwind);
667  KEYWORD(optforfuzzing);
668  KEYWORD(optnone);
669  KEYWORD(optsize);
670  KEYWORD(readnone);
671  KEYWORD(readonly);
672  KEYWORD(returned);
673  KEYWORD(returns_twice);
674  KEYWORD(signext);
675  KEYWORD(speculatable);
676  KEYWORD(sret);
677  KEYWORD(ssp);
678  KEYWORD(sspreq);
679  KEYWORD(sspstrong);
680  KEYWORD(strictfp);
681  KEYWORD(safestack);
682  KEYWORD(shadowcallstack);
683  KEYWORD(sanitize_address);
684  KEYWORD(sanitize_hwaddress);
685  KEYWORD(sanitize_memtag);
686  KEYWORD(sanitize_thread);
687  KEYWORD(sanitize_memory);
688  KEYWORD(speculative_load_hardening);
689  KEYWORD(swifterror);
690  KEYWORD(swiftself);
691  KEYWORD(uwtable);
692  KEYWORD(willreturn);
693  KEYWORD(writeonly);
694  KEYWORD(zeroext);
695  KEYWORD(immarg);
696
697  KEYWORD(type);
698  KEYWORD(opaque);
699
700  KEYWORD(comdat);
701
702  // Comdat types
703  KEYWORD(any);
704  KEYWORD(exactmatch);
705  KEYWORD(largest);
706  KEYWORD(noduplicates);
707  KEYWORD(samesize);
708
709  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
710  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
711  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
712  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
713
714  KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
715  KEYWORD(umin);
716
717  KEYWORD(vscale);
718  KEYWORD(x);
719  KEYWORD(blockaddress);
720
721  // Metadata types.
722  KEYWORD(distinct);
723
724  // Use-list order directives.
725  KEYWORD(uselistorder);
726  KEYWORD(uselistorder_bb);
727
728  KEYWORD(personality);
729  KEYWORD(cleanup);
730  KEYWORD(catch);
731  KEYWORD(filter);
732
733  // Summary index keywords.
734  KEYWORD(path);
735  KEYWORD(hash);
736  KEYWORD(gv);
737  KEYWORD(guid);
738  KEYWORD(name);
739  KEYWORD(summaries);
740  KEYWORD(flags);
741  KEYWORD(linkage);
742  KEYWORD(notEligibleToImport);
743  KEYWORD(live);
744  KEYWORD(dsoLocal);
745  KEYWORD(canAutoHide);
746  KEYWORD(function);
747  KEYWORD(insts);
748  KEYWORD(funcFlags);
749  KEYWORD(readNone);
750  KEYWORD(readOnly);
751  KEYWORD(noRecurse);
752  KEYWORD(returnDoesNotAlias);
753  KEYWORD(noInline);
754  KEYWORD(alwaysInline);
755  KEYWORD(calls);
756  KEYWORD(callee);
757  KEYWORD(hotness);
758  KEYWORD(unknown);
759  KEYWORD(hot);
760  KEYWORD(critical);
761  KEYWORD(relbf);
762  KEYWORD(variable);
763  KEYWORD(vTableFuncs);
764  KEYWORD(virtFunc);
765  KEYWORD(aliasee);
766  KEYWORD(refs);
767  KEYWORD(typeIdInfo);
768  KEYWORD(typeTests);
769  KEYWORD(typeTestAssumeVCalls);
770  KEYWORD(typeCheckedLoadVCalls);
771  KEYWORD(typeTestAssumeConstVCalls);
772  KEYWORD(typeCheckedLoadConstVCalls);
773  KEYWORD(vFuncId);
774  KEYWORD(offset);
775  KEYWORD(args);
776  KEYWORD(typeid);
777  KEYWORD(typeidCompatibleVTable);
778  KEYWORD(summary);
779  KEYWORD(typeTestRes);
780  KEYWORD(kind);
781  KEYWORD(unsat);
782  KEYWORD(byteArray);
783  KEYWORD(inline);
784  KEYWORD(single);
785  KEYWORD(allOnes);
786  KEYWORD(sizeM1BitWidth);
787  KEYWORD(alignLog2);
788  KEYWORD(sizeM1);
789  KEYWORD(bitMask);
790  KEYWORD(inlineBits);
791  KEYWORD(wpdResolutions);
792  KEYWORD(wpdRes);
793  KEYWORD(indir);
794  KEYWORD(singleImpl);
795  KEYWORD(branchFunnel);
796  KEYWORD(singleImplName);
797  KEYWORD(resByArg);
798  KEYWORD(byArg);
799  KEYWORD(uniformRetVal);
800  KEYWORD(uniqueRetVal);
801  KEYWORD(virtualConstProp);
802  KEYWORD(info);
803  KEYWORD(byte);
804  KEYWORD(bit);
805  KEYWORD(varFlags);
806
807#undef KEYWORD
808
809  // Keywords for types.
810#define TYPEKEYWORD(STR, LLVMTY)                                               \
811  do {                                                                         \
812    if (Keyword == STR) {                                                      \
813      TyVal = LLVMTY;                                                          \
814      return lltok::Type;                                                      \
815    }                                                                          \
816  } while (false)
817
818  TYPEKEYWORD("void",      Type::getVoidTy(Context));
819  TYPEKEYWORD("half",      Type::getHalfTy(Context));
820  TYPEKEYWORD("float",     Type::getFloatTy(Context));
821  TYPEKEYWORD("double",    Type::getDoubleTy(Context));
822  TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
823  TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
824  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
825  TYPEKEYWORD("label",     Type::getLabelTy(Context));
826  TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
827  TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
828  TYPEKEYWORD("token",     Type::getTokenTy(Context));
829
830#undef TYPEKEYWORD
831
832  // Keywords for instructions.
833#define INSTKEYWORD(STR, Enum)                                                 \
834  do {                                                                         \
835    if (Keyword == #STR) {                                                     \
836      UIntVal = Instruction::Enum;                                             \
837      return lltok::kw_##STR;                                                  \
838    }                                                                          \
839  } while (false)
840
841  INSTKEYWORD(fneg,  FNeg);
842
843  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
844  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
845  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
846  INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
847  INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
848  INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
849  INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
850  INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
851
852  INSTKEYWORD(phi,         PHI);
853  INSTKEYWORD(call,        Call);
854  INSTKEYWORD(trunc,       Trunc);
855  INSTKEYWORD(zext,        ZExt);
856  INSTKEYWORD(sext,        SExt);
857  INSTKEYWORD(fptrunc,     FPTrunc);
858  INSTKEYWORD(fpext,       FPExt);
859  INSTKEYWORD(uitofp,      UIToFP);
860  INSTKEYWORD(sitofp,      SIToFP);
861  INSTKEYWORD(fptoui,      FPToUI);
862  INSTKEYWORD(fptosi,      FPToSI);
863  INSTKEYWORD(inttoptr,    IntToPtr);
864  INSTKEYWORD(ptrtoint,    PtrToInt);
865  INSTKEYWORD(bitcast,     BitCast);
866  INSTKEYWORD(addrspacecast, AddrSpaceCast);
867  INSTKEYWORD(select,      Select);
868  INSTKEYWORD(va_arg,      VAArg);
869  INSTKEYWORD(ret,         Ret);
870  INSTKEYWORD(br,          Br);
871  INSTKEYWORD(switch,      Switch);
872  INSTKEYWORD(indirectbr,  IndirectBr);
873  INSTKEYWORD(invoke,      Invoke);
874  INSTKEYWORD(resume,      Resume);
875  INSTKEYWORD(unreachable, Unreachable);
876  INSTKEYWORD(callbr,      CallBr);
877
878  INSTKEYWORD(alloca,      Alloca);
879  INSTKEYWORD(load,        Load);
880  INSTKEYWORD(store,       Store);
881  INSTKEYWORD(cmpxchg,     AtomicCmpXchg);
882  INSTKEYWORD(atomicrmw,   AtomicRMW);
883  INSTKEYWORD(fence,       Fence);
884  INSTKEYWORD(getelementptr, GetElementPtr);
885
886  INSTKEYWORD(extractelement, ExtractElement);
887  INSTKEYWORD(insertelement,  InsertElement);
888  INSTKEYWORD(shufflevector,  ShuffleVector);
889  INSTKEYWORD(extractvalue,   ExtractValue);
890  INSTKEYWORD(insertvalue,    InsertValue);
891  INSTKEYWORD(landingpad,     LandingPad);
892  INSTKEYWORD(cleanupret,     CleanupRet);
893  INSTKEYWORD(catchret,       CatchRet);
894  INSTKEYWORD(catchswitch,  CatchSwitch);
895  INSTKEYWORD(catchpad,     CatchPad);
896  INSTKEYWORD(cleanuppad,   CleanupPad);
897
898  INSTKEYWORD(freeze,       Freeze);
899
900#undef INSTKEYWORD
901
902#define DWKEYWORD(TYPE, TOKEN)                                                 \
903  do {                                                                         \
904    if (Keyword.startswith("DW_" #TYPE "_")) {                                 \
905      StrVal.assign(Keyword.begin(), Keyword.end());                           \
906      return lltok::TOKEN;                                                     \
907    }                                                                          \
908  } while (false)
909
910  DWKEYWORD(TAG, DwarfTag);
911  DWKEYWORD(ATE, DwarfAttEncoding);
912  DWKEYWORD(VIRTUALITY, DwarfVirtuality);
913  DWKEYWORD(LANG, DwarfLang);
914  DWKEYWORD(CC, DwarfCC);
915  DWKEYWORD(OP, DwarfOp);
916  DWKEYWORD(MACINFO, DwarfMacinfo);
917
918#undef DWKEYWORD
919
920  if (Keyword.startswith("DIFlag")) {
921    StrVal.assign(Keyword.begin(), Keyword.end());
922    return lltok::DIFlag;
923  }
924
925  if (Keyword.startswith("DISPFlag")) {
926    StrVal.assign(Keyword.begin(), Keyword.end());
927    return lltok::DISPFlag;
928  }
929
930  if (Keyword.startswith("CSK_")) {
931    StrVal.assign(Keyword.begin(), Keyword.end());
932    return lltok::ChecksumKind;
933  }
934
935  if (Keyword == "NoDebug" || Keyword == "FullDebug" ||
936      Keyword == "LineTablesOnly" || Keyword == "DebugDirectivesOnly") {
937    StrVal.assign(Keyword.begin(), Keyword.end());
938    return lltok::EmissionKind;
939  }
940
941  if (Keyword == "GNU" || Keyword == "None" || Keyword == "Default") {
942    StrVal.assign(Keyword.begin(), Keyword.end());
943    return lltok::NameTableKind;
944  }
945
946  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
947  // the CFE to avoid forcing it to deal with 64-bit numbers.
948  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
949      TokStart[1] == '0' && TokStart[2] == 'x' &&
950      isxdigit(static_cast<unsigned char>(TokStart[3]))) {
951    int len = CurPtr-TokStart-3;
952    uint32_t bits = len * 4;
953    StringRef HexStr(TokStart + 3, len);
954    if (!all_of(HexStr, isxdigit)) {
955      // Bad token, return it as an error.
956      CurPtr = TokStart+3;
957      return lltok::Error;
958    }
959    APInt Tmp(bits, HexStr, 16);
960    uint32_t activeBits = Tmp.getActiveBits();
961    if (activeBits > 0 && activeBits < bits)
962      Tmp = Tmp.trunc(activeBits);
963    APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
964    return lltok::APSInt;
965  }
966
967  // If this is "cc1234", return this as just "cc".
968  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
969    CurPtr = TokStart+2;
970    return lltok::kw_cc;
971  }
972
973  // Finally, if this isn't known, return an error.
974  CurPtr = TokStart+1;
975  return lltok::Error;
976}
977
978/// Lex all tokens that start with a 0x prefix, knowing they match and are not
979/// labels.
980///    HexFPConstant     0x[0-9A-Fa-f]+
981///    HexFP80Constant   0xK[0-9A-Fa-f]+
982///    HexFP128Constant  0xL[0-9A-Fa-f]+
983///    HexPPC128Constant 0xM[0-9A-Fa-f]+
984///    HexHalfConstant   0xH[0-9A-Fa-f]+
985lltok::Kind LLLexer::Lex0x() {
986  CurPtr = TokStart + 2;
987
988  char Kind;
989  if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') {
990    Kind = *CurPtr++;
991  } else {
992    Kind = 'J';
993  }
994
995  if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
996    // Bad token, return it as an error.
997    CurPtr = TokStart+1;
998    return lltok::Error;
999  }
1000
1001  while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
1002    ++CurPtr;
1003
1004  if (Kind == 'J') {
1005    // HexFPConstant - Floating point constant represented in IEEE format as a
1006    // hexadecimal number for when exponential notation is not precise enough.
1007    // Half, Float, and double only.
1008    APFloatVal = APFloat(APFloat::IEEEdouble(),
1009                         APInt(64, HexIntToVal(TokStart + 2, CurPtr)));
1010    return lltok::APFloat;
1011  }
1012
1013  uint64_t Pair[2];
1014  switch (Kind) {
1015  default: llvm_unreachable("Unknown kind!");
1016  case 'K':
1017    // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
1018    FP80HexToIntPair(TokStart+3, CurPtr, Pair);
1019    APFloatVal = APFloat(APFloat::x87DoubleExtended(), APInt(80, Pair));
1020    return lltok::APFloat;
1021  case 'L':
1022    // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
1023    HexToIntPair(TokStart+3, CurPtr, Pair);
1024    APFloatVal = APFloat(APFloat::IEEEquad(), APInt(128, Pair));
1025    return lltok::APFloat;
1026  case 'M':
1027    // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
1028    HexToIntPair(TokStart+3, CurPtr, Pair);
1029    APFloatVal = APFloat(APFloat::PPCDoubleDouble(), APInt(128, Pair));
1030    return lltok::APFloat;
1031  case 'H':
1032    APFloatVal = APFloat(APFloat::IEEEhalf(),
1033                         APInt(16,HexIntToVal(TokStart+3, CurPtr)));
1034    return lltok::APFloat;
1035  }
1036}
1037
1038/// Lex tokens for a label or a numeric constant, possibly starting with -.
1039///    Label             [-a-zA-Z$._0-9]+:
1040///    NInteger          -[0-9]+
1041///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
1042///    PInteger          [0-9]+
1043///    HexFPConstant     0x[0-9A-Fa-f]+
1044///    HexFP80Constant   0xK[0-9A-Fa-f]+
1045///    HexFP128Constant  0xL[0-9A-Fa-f]+
1046///    HexPPC128Constant 0xM[0-9A-Fa-f]+
1047lltok::Kind LLLexer::LexDigitOrNegative() {
1048  // If the letter after the negative is not a number, this is probably a label.
1049  if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
1050      !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
1051    // Okay, this is not a number after the -, it's probably a label.
1052    if (const char *End = isLabelTail(CurPtr)) {
1053      StrVal.assign(TokStart, End-1);
1054      CurPtr = End;
1055      return lltok::LabelStr;
1056    }
1057
1058    return lltok::Error;
1059  }
1060
1061  // At this point, it is either a label, int or fp constant.
1062
1063  // Skip digits, we have at least one.
1064  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
1065    /*empty*/;
1066
1067  // Check if this is a fully-numeric label:
1068  if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
1069    uint64_t Val = atoull(TokStart, CurPtr);
1070    ++CurPtr; // Skip the colon.
1071    if ((unsigned)Val != Val)
1072      Error("invalid value number (too large)!");
1073    UIntVal = unsigned(Val);
1074    return lltok::LabelID;
1075  }
1076
1077  // Check to see if this really is a string label, e.g. "-1:".
1078  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
1079    if (const char *End = isLabelTail(CurPtr)) {
1080      StrVal.assign(TokStart, End-1);
1081      CurPtr = End;
1082      return lltok::LabelStr;
1083    }
1084  }
1085
1086  // If the next character is a '.', then it is a fp value, otherwise its
1087  // integer.
1088  if (CurPtr[0] != '.') {
1089    if (TokStart[0] == '0' && TokStart[1] == 'x')
1090      return Lex0x();
1091    APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
1092    return lltok::APSInt;
1093  }
1094
1095  ++CurPtr;
1096
1097  // Skip over [0-9]*([eE][-+]?[0-9]+)?
1098  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1099
1100  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
1101    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
1102        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
1103          isdigit(static_cast<unsigned char>(CurPtr[2])))) {
1104      CurPtr += 2;
1105      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1106    }
1107  }
1108
1109  APFloatVal = APFloat(APFloat::IEEEdouble(),
1110                       StringRef(TokStart, CurPtr - TokStart));
1111  return lltok::APFloat;
1112}
1113
1114/// Lex a floating point constant starting with +.
1115///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
1116lltok::Kind LLLexer::LexPositive() {
1117  // If the letter after the negative is a number, this is probably not a
1118  // label.
1119  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
1120    return lltok::Error;
1121
1122  // Skip digits.
1123  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
1124    /*empty*/;
1125
1126  // At this point, we need a '.'.
1127  if (CurPtr[0] != '.') {
1128    CurPtr = TokStart+1;
1129    return lltok::Error;
1130  }
1131
1132  ++CurPtr;
1133
1134  // Skip over [0-9]*([eE][-+]?[0-9]+)?
1135  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1136
1137  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
1138    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
1139        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
1140        isdigit(static_cast<unsigned char>(CurPtr[2])))) {
1141      CurPtr += 2;
1142      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
1143    }
1144  }
1145
1146  APFloatVal = APFloat(APFloat::IEEEdouble(),
1147                       StringRef(TokStart, CurPtr - TokStart));
1148  return lltok::APFloat;
1149}
1150