1#include <cstdio>
2#include <cstdlib>
3#include <string>
4#include <map>
5#include <vector>
6
7//===----------------------------------------------------------------------===//
8// Lexer
9//===----------------------------------------------------------------------===//
10
11// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
12// of these for known things.
13enum Token {
14  tok_eof = -1,
15
16  // commands
17  tok_def = -2, tok_extern = -3,
18
19  // primary
20  tok_identifier = -4, tok_number = -5
21};
22
23static std::string IdentifierStr;  // Filled in if tok_identifier
24static double NumVal;              // Filled in if tok_number
25
26/// gettok - Return the next token from standard input.
27static int gettok() {
28  static int LastChar = ' ';
29
30  // Skip any whitespace.
31  while (isspace(LastChar))
32    LastChar = getchar();
33
34  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
35    IdentifierStr = LastChar;
36    while (isalnum((LastChar = getchar())))
37      IdentifierStr += LastChar;
38
39    if (IdentifierStr == "def") return tok_def;
40    if (IdentifierStr == "extern") return tok_extern;
41    return tok_identifier;
42  }
43
44  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
45    std::string NumStr;
46    do {
47      NumStr += LastChar;
48      LastChar = getchar();
49    } while (isdigit(LastChar) || LastChar == '.');
50
51    NumVal = strtod(NumStr.c_str(), 0);
52    return tok_number;
53  }
54
55  if (LastChar == '#') {
56    // Comment until end of line.
57    do LastChar = getchar();
58    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
59
60    if (LastChar != EOF)
61      return gettok();
62  }
63
64  // Check for end of file.  Don't eat the EOF.
65  if (LastChar == EOF)
66    return tok_eof;
67
68  // Otherwise, just return the character as its ascii value.
69  int ThisChar = LastChar;
70  LastChar = getchar();
71  return ThisChar;
72}
73
74//===----------------------------------------------------------------------===//
75// Abstract Syntax Tree (aka Parse Tree)
76//===----------------------------------------------------------------------===//
77
78/// ExprAST - Base class for all expression nodes.
79class ExprAST {
80public:
81  virtual ~ExprAST() {}
82};
83
84/// NumberExprAST - Expression class for numeric literals like "1.0".
85class NumberExprAST : public ExprAST {
86  double Val;
87public:
88  NumberExprAST(double val) : Val(val) {}
89};
90
91/// VariableExprAST - Expression class for referencing a variable, like "a".
92class VariableExprAST : public ExprAST {
93  std::string Name;
94public:
95  VariableExprAST(const std::string &name) : Name(name) {}
96};
97
98/// BinaryExprAST - Expression class for a binary operator.
99class BinaryExprAST : public ExprAST {
100  char Op;
101  ExprAST *LHS, *RHS;
102public:
103  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
104    : Op(op), LHS(lhs), RHS(rhs) {}
105};
106
107/// CallExprAST - Expression class for function calls.
108class CallExprAST : public ExprAST {
109  std::string Callee;
110  std::vector<ExprAST*> Args;
111public:
112  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
113    : Callee(callee), Args(args) {}
114};
115
116/// PrototypeAST - This class represents the "prototype" for a function,
117/// which captures its name, and its argument names (thus implicitly the number
118/// of arguments the function takes).
119class PrototypeAST {
120  std::string Name;
121  std::vector<std::string> Args;
122public:
123  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
124    : Name(name), Args(args) {}
125
126};
127
128/// FunctionAST - This class represents a function definition itself.
129class FunctionAST {
130  PrototypeAST *Proto;
131  ExprAST *Body;
132public:
133  FunctionAST(PrototypeAST *proto, ExprAST *body)
134    : Proto(proto), Body(body) {}
135
136};
137
138//===----------------------------------------------------------------------===//
139// Parser
140//===----------------------------------------------------------------------===//
141
142/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
143/// token the parser is looking at.  getNextToken reads another token from the
144/// lexer and updates CurTok with its results.
145static int CurTok;
146static int getNextToken() {
147  return CurTok = gettok();
148}
149
150/// BinopPrecedence - This holds the precedence for each binary operator that is
151/// defined.
152static std::map<char, int> BinopPrecedence;
153
154/// GetTokPrecedence - Get the precedence of the pending binary operator token.
155static int GetTokPrecedence() {
156  if (!isascii(CurTok))
157    return -1;
158
159  // Make sure it's a declared binop.
160  int TokPrec = BinopPrecedence[CurTok];
161  if (TokPrec <= 0) return -1;
162  return TokPrec;
163}
164
165/// Error* - These are little helper functions for error handling.
166ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
167PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
168FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
169
170static ExprAST *ParseExpression();
171
172/// identifierexpr
173///   ::= identifier
174///   ::= identifier '(' expression* ')'
175static ExprAST *ParseIdentifierExpr() {
176  std::string IdName = IdentifierStr;
177
178  getNextToken();  // eat identifier.
179
180  if (CurTok != '(') // Simple variable ref.
181    return new VariableExprAST(IdName);
182
183  // Call.
184  getNextToken();  // eat (
185  std::vector<ExprAST*> Args;
186  if (CurTok != ')') {
187    while (1) {
188      ExprAST *Arg = ParseExpression();
189      if (!Arg) return 0;
190      Args.push_back(Arg);
191
192      if (CurTok == ')') break;
193
194      if (CurTok != ',')
195        return Error("Expected ')' or ',' in argument list");
196      getNextToken();
197    }
198  }
199
200  // Eat the ')'.
201  getNextToken();
202
203  return new CallExprAST(IdName, Args);
204}
205
206/// numberexpr ::= number
207static ExprAST *ParseNumberExpr() {
208  ExprAST *Result = new NumberExprAST(NumVal);
209  getNextToken(); // consume the number
210  return Result;
211}
212
213/// parenexpr ::= '(' expression ')'
214static ExprAST *ParseParenExpr() {
215  getNextToken();  // eat (.
216  ExprAST *V = ParseExpression();
217  if (!V) return 0;
218
219  if (CurTok != ')')
220    return Error("expected ')'");
221  getNextToken();  // eat ).
222  return V;
223}
224
225/// primary
226///   ::= identifierexpr
227///   ::= numberexpr
228///   ::= parenexpr
229static ExprAST *ParsePrimary() {
230  switch (CurTok) {
231  default: return Error("unknown token when expecting an expression");
232  case tok_identifier: return ParseIdentifierExpr();
233  case tok_number:     return ParseNumberExpr();
234  case '(':            return ParseParenExpr();
235  }
236}
237
238/// binoprhs
239///   ::= ('+' primary)*
240static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
241  // If this is a binop, find its precedence.
242  while (1) {
243    int TokPrec = GetTokPrecedence();
244
245    // If this is a binop that binds at least as tightly as the current binop,
246    // consume it, otherwise we are done.
247    if (TokPrec < ExprPrec)
248      return LHS;
249
250    // Okay, we know this is a binop.
251    int BinOp = CurTok;
252    getNextToken();  // eat binop
253
254    // Parse the primary expression after the binary operator.
255    ExprAST *RHS = ParsePrimary();
256    if (!RHS) return 0;
257
258    // If BinOp binds less tightly with RHS than the operator after RHS, let
259    // the pending operator take RHS as its LHS.
260    int NextPrec = GetTokPrecedence();
261    if (TokPrec < NextPrec) {
262      RHS = ParseBinOpRHS(TokPrec+1, RHS);
263      if (RHS == 0) return 0;
264    }
265
266    // Merge LHS/RHS.
267    LHS = new BinaryExprAST(BinOp, LHS, RHS);
268  }
269}
270
271/// expression
272///   ::= primary binoprhs
273///
274static ExprAST *ParseExpression() {
275  ExprAST *LHS = ParsePrimary();
276  if (!LHS) return 0;
277
278  return ParseBinOpRHS(0, LHS);
279}
280
281/// prototype
282///   ::= id '(' id* ')'
283static PrototypeAST *ParsePrototype() {
284  if (CurTok != tok_identifier)
285    return ErrorP("Expected function name in prototype");
286
287  std::string FnName = IdentifierStr;
288  getNextToken();
289
290  if (CurTok != '(')
291    return ErrorP("Expected '(' in prototype");
292
293  std::vector<std::string> ArgNames;
294  while (getNextToken() == tok_identifier)
295    ArgNames.push_back(IdentifierStr);
296  if (CurTok != ')')
297    return ErrorP("Expected ')' in prototype");
298
299  // success.
300  getNextToken();  // eat ')'.
301
302  return new PrototypeAST(FnName, ArgNames);
303}
304
305/// definition ::= 'def' prototype expression
306static FunctionAST *ParseDefinition() {
307  getNextToken();  // eat def.
308  PrototypeAST *Proto = ParsePrototype();
309  if (Proto == 0) return 0;
310
311  if (ExprAST *E = ParseExpression())
312    return new FunctionAST(Proto, E);
313  return 0;
314}
315
316/// toplevelexpr ::= expression
317static FunctionAST *ParseTopLevelExpr() {
318  if (ExprAST *E = ParseExpression()) {
319    // Make an anonymous proto.
320    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
321    return new FunctionAST(Proto, E);
322  }
323  return 0;
324}
325
326/// external ::= 'extern' prototype
327static PrototypeAST *ParseExtern() {
328  getNextToken();  // eat extern.
329  return ParsePrototype();
330}
331
332//===----------------------------------------------------------------------===//
333// Top-Level parsing
334//===----------------------------------------------------------------------===//
335
336static void HandleDefinition() {
337  if (ParseDefinition()) {
338    fprintf(stderr, "Parsed a function definition.\n");
339  } else {
340    // Skip token for error recovery.
341    getNextToken();
342  }
343}
344
345static void HandleExtern() {
346  if (ParseExtern()) {
347    fprintf(stderr, "Parsed an extern\n");
348  } else {
349    // Skip token for error recovery.
350    getNextToken();
351  }
352}
353
354static void HandleTopLevelExpression() {
355  // Evaluate a top-level expression into an anonymous function.
356  if (ParseTopLevelExpr()) {
357    fprintf(stderr, "Parsed a top-level expr\n");
358  } else {
359    // Skip token for error recovery.
360    getNextToken();
361  }
362}
363
364/// top ::= definition | external | expression | ';'
365static void MainLoop() {
366  while (1) {
367    fprintf(stderr, "ready> ");
368    switch (CurTok) {
369    case tok_eof:    return;
370    case ';':        getNextToken(); break;  // ignore top-level semicolons.
371    case tok_def:    HandleDefinition(); break;
372    case tok_extern: HandleExtern(); break;
373    default:         HandleTopLevelExpression(); break;
374    }
375  }
376}
377
378//===----------------------------------------------------------------------===//
379// Main driver code.
380//===----------------------------------------------------------------------===//
381
382int main() {
383  // Install standard binary operators.
384  // 1 is lowest precedence.
385  BinopPrecedence['<'] = 10;
386  BinopPrecedence['+'] = 20;
387  BinopPrecedence['-'] = 20;
388  BinopPrecedence['*'] = 40;  // highest.
389
390  // Prime the first token.
391  fprintf(stderr, "ready> ");
392  getNextToken();
393
394  // Run the main "interpreter loop" now.
395  MainLoop();
396
397  return 0;
398}
399