1#include <cctype> 2#include <cstdio> 3#include <cstdlib> 4#include <map> 5#include <memory> 6#include <string> 7#include <utility> 8#include <vector> 9 10//===----------------------------------------------------------------------===// 11// Lexer 12//===----------------------------------------------------------------------===// 13 14// The lexer returns tokens [0-255] if it is an unknown character, otherwise one 15// of these for known things. 16enum Token { 17 tok_eof = -1, 18 19 // commands 20 tok_def = -2, 21 tok_extern = -3, 22 23 // primary 24 tok_identifier = -4, 25 tok_number = -5 26}; 27 28static std::string IdentifierStr; // Filled in if tok_identifier 29static double NumVal; // Filled in if tok_number 30 31/// gettok - Return the next token from standard input. 32static int gettok() { 33 static int LastChar = ' '; 34 35 // Skip any whitespace. 36 while (isspace(LastChar)) 37 LastChar = getchar(); 38 39 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* 40 IdentifierStr = LastChar; 41 while (isalnum((LastChar = getchar()))) 42 IdentifierStr += LastChar; 43 44 if (IdentifierStr == "def") 45 return tok_def; 46 if (IdentifierStr == "extern") 47 return tok_extern; 48 return tok_identifier; 49 } 50 51 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ 52 std::string NumStr; 53 do { 54 NumStr += LastChar; 55 LastChar = getchar(); 56 } while (isdigit(LastChar) || LastChar == '.'); 57 58 NumVal = strtod(NumStr.c_str(), nullptr); 59 return tok_number; 60 } 61 62 if (LastChar == '#') { 63 // Comment until end of line. 64 do 65 LastChar = getchar(); 66 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); 67 68 if (LastChar != EOF) 69 return gettok(); 70 } 71 72 // Check for end of file. Don't eat the EOF. 73 if (LastChar == EOF) 74 return tok_eof; 75 76 // Otherwise, just return the character as its ascii value. 77 int ThisChar = LastChar; 78 LastChar = getchar(); 79 return ThisChar; 80} 81 82//===----------------------------------------------------------------------===// 83// Abstract Syntax Tree (aka Parse Tree) 84//===----------------------------------------------------------------------===// 85 86namespace { 87 88/// ExprAST - Base class for all expression nodes. 89class ExprAST { 90public: 91 virtual ~ExprAST() = default; 92}; 93 94/// NumberExprAST - Expression class for numeric literals like "1.0". 95class NumberExprAST : public ExprAST { 96 double Val; 97 98public: 99 NumberExprAST(double Val) : Val(Val) {} 100}; 101 102/// VariableExprAST - Expression class for referencing a variable, like "a". 103class VariableExprAST : public ExprAST { 104 std::string Name; 105 106public: 107 VariableExprAST(const std::string &Name) : Name(Name) {} 108}; 109 110/// BinaryExprAST - Expression class for a binary operator. 111class BinaryExprAST : public ExprAST { 112 char Op; 113 std::unique_ptr<ExprAST> LHS, RHS; 114 115public: 116 BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS, 117 std::unique_ptr<ExprAST> RHS) 118 : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} 119}; 120 121/// CallExprAST - Expression class for function calls. 122class CallExprAST : public ExprAST { 123 std::string Callee; 124 std::vector<std::unique_ptr<ExprAST>> Args; 125 126public: 127 CallExprAST(const std::string &Callee, 128 std::vector<std::unique_ptr<ExprAST>> Args) 129 : Callee(Callee), Args(std::move(Args)) {} 130}; 131 132/// PrototypeAST - This class represents the "prototype" for a function, 133/// which captures its name, and its argument names (thus implicitly the number 134/// of arguments the function takes). 135class PrototypeAST { 136 std::string Name; 137 std::vector<std::string> Args; 138 139public: 140 PrototypeAST(const std::string &Name, std::vector<std::string> Args) 141 : Name(Name), Args(std::move(Args)) {} 142 143 const std::string &getName() const { return Name; } 144}; 145 146/// FunctionAST - This class represents a function definition itself. 147class FunctionAST { 148 std::unique_ptr<PrototypeAST> Proto; 149 std::unique_ptr<ExprAST> Body; 150 151public: 152 FunctionAST(std::unique_ptr<PrototypeAST> Proto, 153 std::unique_ptr<ExprAST> Body) 154 : Proto(std::move(Proto)), Body(std::move(Body)) {} 155}; 156 157} // end anonymous namespace 158 159//===----------------------------------------------------------------------===// 160// Parser 161//===----------------------------------------------------------------------===// 162 163/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current 164/// token the parser is looking at. getNextToken reads another token from the 165/// lexer and updates CurTok with its results. 166static int CurTok; 167static int getNextToken() { return CurTok = gettok(); } 168 169/// BinopPrecedence - This holds the precedence for each binary operator that is 170/// defined. 171static std::map<char, int> BinopPrecedence; 172 173/// GetTokPrecedence - Get the precedence of the pending binary operator token. 174static int GetTokPrecedence() { 175 if (!isascii(CurTok)) 176 return -1; 177 178 // Make sure it's a declared binop. 179 int TokPrec = BinopPrecedence[CurTok]; 180 if (TokPrec <= 0) 181 return -1; 182 return TokPrec; 183} 184 185/// LogError* - These are little helper functions for error handling. 186std::unique_ptr<ExprAST> LogError(const char *Str) { 187 fprintf(stderr, "Error: %s\n", Str); 188 return nullptr; 189} 190std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) { 191 LogError(Str); 192 return nullptr; 193} 194 195static std::unique_ptr<ExprAST> ParseExpression(); 196 197/// numberexpr ::= number 198static std::unique_ptr<ExprAST> ParseNumberExpr() { 199 auto Result = std::make_unique<NumberExprAST>(NumVal); 200 getNextToken(); // consume the number 201 return std::move(Result); 202} 203 204/// parenexpr ::= '(' expression ')' 205static std::unique_ptr<ExprAST> ParseParenExpr() { 206 getNextToken(); // eat (. 207 auto V = ParseExpression(); 208 if (!V) 209 return nullptr; 210 211 if (CurTok != ')') 212 return LogError("expected ')'"); 213 getNextToken(); // eat ). 214 return V; 215} 216 217/// identifierexpr 218/// ::= identifier 219/// ::= identifier '(' expression* ')' 220static std::unique_ptr<ExprAST> ParseIdentifierExpr() { 221 std::string IdName = IdentifierStr; 222 223 getNextToken(); // eat identifier. 224 225 if (CurTok != '(') // Simple variable ref. 226 return std::make_unique<VariableExprAST>(IdName); 227 228 // Call. 229 getNextToken(); // eat ( 230 std::vector<std::unique_ptr<ExprAST>> Args; 231 if (CurTok != ')') { 232 while (true) { 233 if (auto Arg = ParseExpression()) 234 Args.push_back(std::move(Arg)); 235 else 236 return nullptr; 237 238 if (CurTok == ')') 239 break; 240 241 if (CurTok != ',') 242 return LogError("Expected ')' or ',' in argument list"); 243 getNextToken(); 244 } 245 } 246 247 // Eat the ')'. 248 getNextToken(); 249 250 return std::make_unique<CallExprAST>(IdName, std::move(Args)); 251} 252 253/// primary 254/// ::= identifierexpr 255/// ::= numberexpr 256/// ::= parenexpr 257static std::unique_ptr<ExprAST> ParsePrimary() { 258 switch (CurTok) { 259 default: 260 return LogError("unknown token when expecting an expression"); 261 case tok_identifier: 262 return ParseIdentifierExpr(); 263 case tok_number: 264 return ParseNumberExpr(); 265 case '(': 266 return ParseParenExpr(); 267 } 268} 269 270/// binoprhs 271/// ::= ('+' primary)* 272static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec, 273 std::unique_ptr<ExprAST> LHS) { 274 // If this is a binop, find its precedence. 275 while (true) { 276 int TokPrec = GetTokPrecedence(); 277 278 // If this is a binop that binds at least as tightly as the current binop, 279 // consume it, otherwise we are done. 280 if (TokPrec < ExprPrec) 281 return LHS; 282 283 // Okay, we know this is a binop. 284 int BinOp = CurTok; 285 getNextToken(); // eat binop 286 287 // Parse the primary expression after the binary operator. 288 auto RHS = ParsePrimary(); 289 if (!RHS) 290 return nullptr; 291 292 // If BinOp binds less tightly with RHS than the operator after RHS, let 293 // the pending operator take RHS as its LHS. 294 int NextPrec = GetTokPrecedence(); 295 if (TokPrec < NextPrec) { 296 RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); 297 if (!RHS) 298 return nullptr; 299 } 300 301 // Merge LHS/RHS. 302 LHS = 303 std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS)); 304 } 305} 306 307/// expression 308/// ::= primary binoprhs 309/// 310static std::unique_ptr<ExprAST> ParseExpression() { 311 auto LHS = ParsePrimary(); 312 if (!LHS) 313 return nullptr; 314 315 return ParseBinOpRHS(0, std::move(LHS)); 316} 317 318/// prototype 319/// ::= id '(' id* ')' 320static std::unique_ptr<PrototypeAST> ParsePrototype() { 321 if (CurTok != tok_identifier) 322 return LogErrorP("Expected function name in prototype"); 323 324 std::string FnName = IdentifierStr; 325 getNextToken(); 326 327 if (CurTok != '(') 328 return LogErrorP("Expected '(' in prototype"); 329 330 std::vector<std::string> ArgNames; 331 while (getNextToken() == tok_identifier) 332 ArgNames.push_back(IdentifierStr); 333 if (CurTok != ')') 334 return LogErrorP("Expected ')' in prototype"); 335 336 // success. 337 getNextToken(); // eat ')'. 338 339 return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames)); 340} 341 342/// definition ::= 'def' prototype expression 343static std::unique_ptr<FunctionAST> ParseDefinition() { 344 getNextToken(); // eat def. 345 auto Proto = ParsePrototype(); 346 if (!Proto) 347 return nullptr; 348 349 if (auto E = ParseExpression()) 350 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 351 return nullptr; 352} 353 354/// toplevelexpr ::= expression 355static std::unique_ptr<FunctionAST> ParseTopLevelExpr() { 356 if (auto E = ParseExpression()) { 357 // Make an anonymous proto. 358 auto Proto = std::make_unique<PrototypeAST>("__anon_expr", 359 std::vector<std::string>()); 360 return std::make_unique<FunctionAST>(std::move(Proto), std::move(E)); 361 } 362 return nullptr; 363} 364 365/// external ::= 'extern' prototype 366static std::unique_ptr<PrototypeAST> ParseExtern() { 367 getNextToken(); // eat extern. 368 return ParsePrototype(); 369} 370 371//===----------------------------------------------------------------------===// 372// Top-Level parsing 373//===----------------------------------------------------------------------===// 374 375static void HandleDefinition() { 376 if (ParseDefinition()) { 377 fprintf(stderr, "Parsed a function definition.\n"); 378 } else { 379 // Skip token for error recovery. 380 getNextToken(); 381 } 382} 383 384static void HandleExtern() { 385 if (ParseExtern()) { 386 fprintf(stderr, "Parsed an extern\n"); 387 } else { 388 // Skip token for error recovery. 389 getNextToken(); 390 } 391} 392 393static void HandleTopLevelExpression() { 394 // Evaluate a top-level expression into an anonymous function. 395 if (ParseTopLevelExpr()) { 396 fprintf(stderr, "Parsed a top-level expr\n"); 397 } else { 398 // Skip token for error recovery. 399 getNextToken(); 400 } 401} 402 403/// top ::= definition | external | expression | ';' 404static void MainLoop() { 405 while (true) { 406 fprintf(stderr, "ready> "); 407 switch (CurTok) { 408 case tok_eof: 409 return; 410 case ';': // ignore top-level semicolons. 411 getNextToken(); 412 break; 413 case tok_def: 414 HandleDefinition(); 415 break; 416 case tok_extern: 417 HandleExtern(); 418 break; 419 default: 420 HandleTopLevelExpression(); 421 break; 422 } 423 } 424} 425 426//===----------------------------------------------------------------------===// 427// Main driver code. 428//===----------------------------------------------------------------------===// 429 430int main() { 431 // Install standard binary operators. 432 // 1 is lowest precedence. 433 BinopPrecedence['<'] = 10; 434 BinopPrecedence['+'] = 20; 435 BinopPrecedence['-'] = 20; 436 BinopPrecedence['*'] = 40; // highest. 437 438 // Prime the first token. 439 fprintf(stderr, "ready> "); 440 getNextToken(); 441 442 // Run the main "interpreter loop" now. 443 MainLoop(); 444 445 return 0; 446} 447