LexerParser.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. #include "llvm/ADT/STLExtras.h"
  2. #include "llvm/IR/IRBuilder.h"
  3. #include "llvm/IR/LLVMContext.h"
  4. #include "llvm/IR/Module.h"
  5. #include "llvm/IR/Verifier.h"
  6. #include <cctype>
  7. #include <cstdio>
  8. #include <map>
  9. #include <string>
  10. #include <vector>
  11. #include "Ast.h"
  12. using namespace llvm;
  13. using namespace ast;
  14. namespace helper {
  15. // Cloning make_unique here until it's standard in C++14.
  16. // Using a namespace to avoid conflicting with MSVC's std::make_unique (which
  17. // ADL can sometimes find in unqualified calls).
  18. template <class T, class... Args>
  19. static
  20. typename std::enable_if<!std::is_array<T>::value, std::unique_ptr<T>>::type
  21. make_unique(Args &&... args) {
  22. return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
  23. }
  24. }
  25. //===----------------------------------------------------------------------===//
  26. // Lexer
  27. //===----------------------------------------------------------------------===//
  28. // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
  29. // of these for known things.
  30. enum Token {
  31. tok_eof = -1,
  32. // commands
  33. tok_def = -2,
  34. tok_extern = -3,
  35. // primary
  36. tok_identifier = -4,
  37. tok_number = -5
  38. };
  39. static std::string IdentifierStr; // Filled in if tok_identifier
  40. static double NumVal; // Filled in if tok_number
  41. /// gettok - Return the next token from standard input.
  42. static int gettok() {
  43. static int LastChar = ' ';
  44. // Skip any whitespace.
  45. while (isspace(LastChar))
  46. LastChar = getchar();
  47. if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
  48. IdentifierStr = LastChar;
  49. while (isalnum((LastChar = getchar())))
  50. IdentifierStr += LastChar;
  51. if (IdentifierStr == "def")
  52. return tok_def;
  53. if (IdentifierStr == "extern")
  54. return tok_extern;
  55. return tok_identifier;
  56. }
  57. if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
  58. std::string NumStr;
  59. do {
  60. NumStr += LastChar;
  61. LastChar = getchar();
  62. } while (isdigit(LastChar) || LastChar == '.');
  63. NumVal = strtod(NumStr.c_str(), nullptr);
  64. return tok_number;
  65. }
  66. if (LastChar == '#') {
  67. // Comment until end of line.
  68. do
  69. LastChar = getchar();
  70. while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
  71. if (LastChar != EOF)
  72. return gettok();
  73. }
  74. // Check for end of file. Don't eat the EOF.
  75. if (LastChar == EOF)
  76. return tok_eof;
  77. // Otherwise, just return the character as its ascii value.
  78. int ThisChar = LastChar;
  79. LastChar = getchar();
  80. return ThisChar;
  81. }
  82. //===----------------------------------------------------------------------===//
  83. // Parser
  84. //===----------------------------------------------------------------------===//
  85. /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
  86. /// token the parser is looking at. getNextToken reads another token from the
  87. /// lexer and updates CurTok with its results.
  88. static int CurTok;
  89. static int getNextToken() { return CurTok = gettok(); }
  90. /// BinopPrecedence - This holds the precedence for each binary operator that is
  91. /// defined.
  92. static std::map<char, int> BinopPrecedence;
  93. /// GetTokPrecedence - Get the precedence of the pending binary operator token.
  94. static int GetTokPrecedence() {
  95. if (!isascii(CurTok))
  96. return -1;
  97. // Make sure it's a declared binop.
  98. int TokPrec = BinopPrecedence[CurTok];
  99. if (TokPrec <= 0)
  100. return -1;
  101. return TokPrec;
  102. }
  103. /// Error* - These are little helper functions for error handling.
  104. std::unique_ptr<ExprAST> Error(const char *Str) {
  105. fprintf(stderr, "Error: %s\n", Str);
  106. return nullptr;
  107. }
  108. std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
  109. Error(Str);
  110. return nullptr;
  111. }
  112. static std::unique_ptr<ExprAST> ParseExpression();
  113. /// numberexpr ::= number
  114. static std::unique_ptr<ExprAST> ParseNumberExpr() {
  115. auto Result = helper::make_unique<NumberExprAST>(NumVal);
  116. getNextToken(); // consume the number
  117. return std::move(Result);
  118. }
  119. /// parenexpr ::= '(' expression ')'
  120. static std::unique_ptr<ExprAST> ParseParenExpr() {
  121. getNextToken(); // eat (.
  122. auto V = ParseExpression();
  123. if (!V)
  124. return nullptr;
  125. if (CurTok != ')')
  126. return Error("expected ')'");
  127. getNextToken(); // eat ).
  128. return V;
  129. }
  130. /// identifierexpr
  131. /// ::= identifier
  132. /// ::= identifier '(' expression* ')'
  133. static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
  134. std::string IdName = IdentifierStr;
  135. getNextToken(); // eat identifier.
  136. if (CurTok != '(') // Simple variable ref.
  137. return helper::make_unique<VariableExprAST>(IdName);
  138. // Call.
  139. getNextToken(); // eat (
  140. std::vector<std::unique_ptr<ExprAST>> Args;
  141. if (CurTok != ')') {
  142. while (1) {
  143. if (auto Arg = ParseExpression())
  144. Args.push_back(std::move(Arg));
  145. else
  146. return nullptr;
  147. if (CurTok == ')')
  148. break;
  149. if (CurTok != ',')
  150. return Error("Expected ')' or ',' in argument list");
  151. getNextToken();
  152. }
  153. }
  154. // Eat the ')'.
  155. getNextToken();
  156. return helper::make_unique<CallExprAST>(IdName, std::move(Args));
  157. }
  158. /// primary
  159. /// ::= identifierexpr
  160. /// ::= numberexpr
  161. /// ::= parenexpr
  162. static std::unique_ptr<ExprAST> ParsePrimary() {
  163. switch (CurTok) {
  164. default:
  165. return Error("unknown token when expecting an expression");
  166. case tok_identifier:
  167. return ParseIdentifierExpr();
  168. case tok_number:
  169. return ParseNumberExpr();
  170. case '(':
  171. return ParseParenExpr();
  172. }
  173. }
  174. /// binoprhs
  175. /// ::= ('+' primary)*
  176. static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
  177. std::unique_ptr<ExprAST> LHS) {
  178. // If this is a binop, find its precedence.
  179. while (1) {
  180. int TokPrec = GetTokPrecedence();
  181. // If this is a binop that binds at least as tightly as the current binop,
  182. // consume it, otherwise we are done.
  183. if (TokPrec < ExprPrec)
  184. return LHS;
  185. // Okay, we know this is a binop.
  186. int BinOp = CurTok;
  187. getNextToken(); // eat binop
  188. // Parse the primary expression after the binary operator.
  189. auto RHS = ParsePrimary();
  190. if (!RHS)
  191. return nullptr;
  192. // If BinOp binds less tightly with RHS than the operator after RHS, let
  193. // the pending operator take RHS as its LHS.
  194. int NextPrec = GetTokPrecedence();
  195. if (TokPrec < NextPrec) {
  196. RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
  197. if (!RHS)
  198. return nullptr;
  199. }
  200. // Merge LHS/RHS.
  201. LHS = helper::make_unique<BinaryExprAST>(BinOp, std::move(LHS),
  202. std::move(RHS));
  203. }
  204. }
  205. /// expression
  206. /// ::= primary binoprhs
  207. ///
  208. static std::unique_ptr<ExprAST> ParseExpression() {
  209. auto LHS = ParsePrimary();
  210. if (!LHS)
  211. return nullptr;
  212. return ParseBinOpRHS(0, std::move(LHS));
  213. }
  214. /// prototype
  215. /// ::= id '(' id* ')'
  216. static std::unique_ptr<PrototypeAST> ParsePrototype() {
  217. if (CurTok != tok_identifier)
  218. return ErrorP("Expected function name in prototype");
  219. std::string FnName = IdentifierStr;
  220. getNextToken();
  221. if (CurTok != '(')
  222. return ErrorP("Expected '(' in prototype");
  223. std::vector<std::string> ArgNames;
  224. while (getNextToken() == tok_identifier)
  225. ArgNames.push_back(IdentifierStr);
  226. if (CurTok != ')')
  227. return ErrorP("Expected ')' in prototype");
  228. // success.
  229. getNextToken(); // eat ')'.
  230. return helper::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
  231. }
  232. /// definition ::= 'def' prototype expression
  233. static std::unique_ptr<FunctionAST> ParseDefinition() {
  234. getNextToken(); // eat def.
  235. auto Proto = ParsePrototype();
  236. if (!Proto)
  237. return nullptr;
  238. if (auto E = ParseExpression())
  239. return helper::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  240. return nullptr;
  241. }
  242. /// toplevelexpr ::= expression
  243. static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
  244. if (auto E = ParseExpression()) {
  245. // Make an anonymous proto.
  246. auto Proto = helper::make_unique<PrototypeAST>("__anon_expr",
  247. std::vector<std::string>());
  248. return helper::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  249. }
  250. return nullptr;
  251. }
  252. /// external ::= 'extern' prototype
  253. static std::unique_ptr<PrototypeAST> ParseExtern() {
  254. getNextToken(); // eat extern.
  255. return ParsePrototype();
  256. }
  257. //===----------------------------------------------------------------------===//
  258. // Top-Level parsing
  259. //===----------------------------------------------------------------------===//
  260. static void HandleDefinition() {
  261. if (auto FnAST = ParseDefinition()) {
  262. if (auto *FnIR = FnAST->codegen()) {
  263. fprintf(stderr, "Read function definition:");
  264. FnIR->dump();
  265. }
  266. } else {
  267. // Skip token for error recovery.
  268. getNextToken();
  269. }
  270. }
  271. static void HandleExtern() {
  272. if (auto ProtoAST = ParseExtern()) {
  273. if (auto *FnIR = ProtoAST->codegen()) {
  274. fprintf(stderr, "Read extern: ");
  275. FnIR->dump();
  276. }
  277. } else {
  278. // Skip token for error recovery.
  279. getNextToken();
  280. }
  281. }
  282. static void HandleTopLevelExpression() {
  283. // Evaluate a top-level expression into an anonymous function.
  284. if (auto FnAST = ParseTopLevelExpr()) {
  285. if (auto *FnIR = FnAST->codegen()) {
  286. fprintf(stderr, "Read top-level expression:");
  287. FnIR->dump();
  288. }
  289. } else {
  290. // Skip token for error recovery.
  291. getNextToken();
  292. }
  293. }
  294. /// top ::= definition | external | expression | ';'
  295. static void MainLoop() {
  296. while (1) {
  297. fprintf(stderr, "ready> ");
  298. switch (CurTok) {
  299. case tok_eof:
  300. return;
  301. case ';': // ignore top-level semicolons.
  302. getNextToken();
  303. break;
  304. case tok_def:
  305. HandleDefinition();
  306. break;
  307. case tok_extern:
  308. HandleExtern();
  309. break;
  310. default:
  311. HandleTopLevelExpression();
  312. break;
  313. }
  314. }
  315. }
  316. //===----------------------------------------------------------------------===//
  317. // Main driver code.
  318. //===----------------------------------------------------------------------===//
  319. int main() {
  320. // Install standard binary operators.
  321. // 1 is lowest precedence.
  322. BinopPrecedence['<'] = 10;
  323. BinopPrecedence['+'] = 20;
  324. BinopPrecedence['-'] = 20;
  325. BinopPrecedence['*'] = 40; // highest.
  326. // Prime the first token.
  327. fprintf(stderr, "ready> ");
  328. getNextToken();
  329. // Run the main "interpreter loop" now.
  330. MainLoop();
  331. AstTree::TheModule->dump();
  332. return 0;
  333. }