1//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class represents the Lexer for tablegen files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef TGLEXER_H
15#define TGLEXER_H
16
17#include "llvm/Support/DataTypes.h"
18#include <string>
19#include <vector>
20#include <cassert>
21
22namespace llvm {
23class MemoryBuffer;
24class SourceMgr;
25class SMLoc;
26class Twine;
27
28namespace tgtok {
29  enum TokKind {
30    // Markers
31    Eof, Error,
32
33    // Tokens with no info.
34    minus, plus,        // - +
35    l_square, r_square, // [ ]
36    l_brace, r_brace,   // { }
37    l_paren, r_paren,   // ( )
38    less, greater,      // < >
39    colon, semi,        // : ;
40    comma, period,      // , .
41    equal, question,    // = ?
42    paste,              // #
43
44    // Keywords.
45    Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
46    MultiClass, String,
47
48    // !keywords.
49    XConcat, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst,
50    XForEach, XHead, XTail, XEmpty, XIf, XEq,
51
52    // Integer value.
53    IntVal,
54
55    // String valued tokens.
56    Id, StrVal, VarName, CodeFragment
57  };
58}
59
60/// TGLexer - TableGen Lexer class.
61class TGLexer {
62  SourceMgr &SrcMgr;
63
64  const char *CurPtr;
65  const MemoryBuffer *CurBuf;
66
67  // Information about the current token.
68  const char *TokStart;
69  tgtok::TokKind CurCode;
70  std::string CurStrVal;  // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
71  int64_t CurIntVal;      // This is valid for INTVAL.
72
73  /// CurBuffer - This is the current buffer index we're lexing from as managed
74  /// by the SourceMgr object.
75  int CurBuffer;
76  /// Dependencies - This is the list of all included files.
77  std::vector<std::string> Dependencies;
78
79public:
80  TGLexer(SourceMgr &SrcMgr);
81  ~TGLexer() {}
82
83  tgtok::TokKind Lex() {
84    return CurCode = LexToken();
85  }
86
87  const std::vector<std::string> &getDependencies() const {
88    return Dependencies;
89  }
90
91  tgtok::TokKind getCode() const { return CurCode; }
92
93  const std::string &getCurStrVal() const {
94    assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal ||
95            CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
96           "This token doesn't have a string value");
97    return CurStrVal;
98  }
99  int64_t getCurIntVal() const {
100    assert(CurCode == tgtok::IntVal && "This token isn't an integer");
101    return CurIntVal;
102  }
103
104  SMLoc getLoc() const;
105
106private:
107  /// LexToken - Read the next token and return its code.
108  tgtok::TokKind LexToken();
109
110  tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
111
112  int getNextChar();
113  int peekNextChar(int Index);
114  void SkipBCPLComment();
115  bool SkipCComment();
116  tgtok::TokKind LexIdentifier();
117  bool LexInclude();
118  tgtok::TokKind LexString();
119  tgtok::TokKind LexVarName();
120  tgtok::TokKind LexNumber();
121  tgtok::TokKind LexBracket();
122  tgtok::TokKind LexExclaim();
123};
124
125} // end namespace llvm
126
127#endif
128