Parser.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. // Standard includes
  2. #include <cassert>
  3. #include <cstdint>
  4. #include <cstdio>
  5. #include <stdexcept>
  6. //LLVM includes
  7. // Local includes
  8. #include "Parser.h"
  9. #include "Lexer.h"
  10. #include "JIT.h"
  11. using namespace lexer;
  12. using namespace jit;
  13. namespace parser{
  14. /// putchard - putchar that takes a double and returns 0.
  15. extern "C" double putchard(double X) {
  16. fputc((char)X, stderr);
  17. return 0;
  18. }
  19. /// printd - printf that takes a double prints it as "%f\n", returning 0.
  20. extern "C" double printd(double X) {
  21. fprintf(stderr, "%f\n", X);
  22. return 0;
  23. }
  24. //===----------------------------------------------------------------------===//
  25. // Parser
  26. //===----------------------------------------------------------------------===//
  27. /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
  28. /// token the parser is looking at. getNextToken reads another token from the
  29. /// lexer and updates CurTok with its results.
  30. int getNextToken() { return CurTok = gettok(); }
  31. /// BinopPrecedence - This holds the precedence for each binary operator that is
  32. /// defined.
  33. /// GetTokPrecedence - Get the precedence of the pending binary operator token.
  34. static int GetTokPrecedence() {
  35. if (!isascii(CurTok))
  36. return -1;
  37. // Make sure it's a declared binop.
  38. int TokPrec = BinopPrecedence[CurTok];
  39. if (TokPrec <= 0)
  40. return -1;
  41. return TokPrec;
  42. }
  43. /// Error* - These are little helper functions for error handling.
  44. std::unique_ptr<ExprAST> Error(const char *Str) {
  45. fprintf(stderr, "Error: %s\n", Str);
  46. return nullptr;
  47. }
  48. std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
  49. Error(Str);
  50. return nullptr;
  51. }
  52. static std::unique_ptr<ExprAST> ParseExpression();
  53. /// numberexpr ::= number
  54. static std::unique_ptr<ExprAST> ParseNumberExpr() {
  55. auto Result = llvm::make_unique<NumberExprAST>(LexerObjects::NumVal);
  56. getNextToken(); // consume the number
  57. return std::move(Result);
  58. }
  59. /// parenexpr ::= '(' expression ')'
  60. static std::unique_ptr<ExprAST> ParseParenExpr() {
  61. getNextToken(); // eat (.
  62. auto V = ParseExpression();
  63. if (!V)
  64. return nullptr;
  65. if (CurTok != ')')
  66. return Error("expected ')'");
  67. getNextToken(); // eat ).
  68. return V;
  69. }
  70. /// identifierexpr
  71. /// ::= identifier
  72. /// ::= identifier '(' expression* ')'
  73. static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
  74. std::string IdName = LexerObjects::IdentifierStr;
  75. getNextToken(); // eat identifier.
  76. if (CurTok != '(') // Simple variable ref.
  77. return llvm::make_unique<VariableExprAST>(IdName);
  78. // Call.
  79. getNextToken(); // eat (
  80. std::vector<std::unique_ptr<ExprAST>> Args;
  81. if (CurTok != ')') {
  82. while (1) {
  83. if (auto Arg = ParseExpression())
  84. Args.push_back(std::move(Arg));
  85. else
  86. return nullptr;
  87. if (CurTok == ')')
  88. break;
  89. if (CurTok != ',')
  90. return Error("Expected ')' or ',' in argument list");
  91. getNextToken();
  92. }
  93. }
  94. // Eat the ')'.
  95. getNextToken();
  96. return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
  97. }
  98. /// ifexpr ::= 'if' expression 'then' expression 'else' expression
  99. static std::unique_ptr<ExprAST> ParseIfExpr() {
  100. getNextToken(); // eat the if.
  101. // condition.
  102. auto Cond = ParseExpression();
  103. if (!Cond)
  104. return nullptr;
  105. if (CurTok != tok_then)
  106. return Error("expected then");
  107. getNextToken(); // eat the then
  108. auto Then = ParseExpression();
  109. if (!Then)
  110. return nullptr;
  111. if (CurTok != tok_else)
  112. return Error("expected else");
  113. getNextToken();
  114. auto Else = ParseExpression();
  115. if (!Else)
  116. return nullptr;
  117. return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
  118. std::move(Else));
  119. }
  120. /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
  121. static std::unique_ptr<ExprAST> ParseForExpr() {
  122. getNextToken(); // eat the for.
  123. if (CurTok != tok_identifier)
  124. return Error("expected identifier after for");
  125. std::string IdName = LexerObjects::IdentifierStr;
  126. getNextToken(); // eat identifier.
  127. if (CurTok != '=')
  128. return Error("expected '=' after for");
  129. getNextToken(); // eat '='.
  130. auto Start = ParseExpression();
  131. if (!Start)
  132. return nullptr;
  133. if (CurTok != ',')
  134. return Error("expected ',' after for start value");
  135. getNextToken();
  136. auto End = ParseExpression();
  137. if (!End)
  138. return nullptr;
  139. // The step value is optional.
  140. std::unique_ptr<ExprAST> Step;
  141. if (CurTok == ',') {
  142. getNextToken();
  143. Step = ParseExpression();
  144. if (!Step)
  145. return nullptr;
  146. }
  147. if (CurTok != tok_in)
  148. return Error("expected 'in' after for");
  149. getNextToken(); // eat 'in'.
  150. auto Body = ParseExpression();
  151. if (!Body)
  152. return nullptr;
  153. return llvm::make_unique<ForExprAST>(IdName, std::move(Start),
  154. std::move(End), std::move(Step),
  155. std::move(Body));
  156. }
  157. /// varexpr ::= 'var' identifier ('=' expression)?
  158. // (',' identifier ('=' expression)?)* 'in' expression
  159. static std::unique_ptr<ExprAST> ParseVarExpr() {
  160. getNextToken(); // eat the var.
  161. std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
  162. // At least one variable name is required.
  163. if (CurTok != tok_identifier)
  164. return Error("expected identifier after var");
  165. while (1) {
  166. std::string Name = LexerObjects::IdentifierStr;
  167. getNextToken(); // eat identifier.
  168. // Read the optional initializer.
  169. std::unique_ptr<ExprAST> Init = nullptr;
  170. if (CurTok == '=') {
  171. getNextToken(); // eat the '='.
  172. Init = ParseExpression();
  173. if (!Init)
  174. return nullptr;
  175. }
  176. VarNames.push_back(std::make_pair(Name, std::move(Init)));
  177. // End of var list, exit loop.
  178. if (CurTok != ',')
  179. break;
  180. getNextToken(); // eat the ','.
  181. if (CurTok != tok_identifier)
  182. return Error("expected identifier list after var");
  183. }
  184. // At this point, we have to have 'in'.
  185. if (CurTok != tok_in)
  186. return Error("expected 'in' keyword after 'var'");
  187. getNextToken(); // eat 'in'.
  188. auto Body = ParseExpression();
  189. if (!Body)
  190. return nullptr;
  191. return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
  192. }
  193. /// primary
  194. /// ::= identifierexpr
  195. /// ::= numberexpr
  196. /// ::= parenexpr
  197. static std::unique_ptr<ExprAST> ParsePrimary() {
  198. switch (CurTok) {
  199. default:
  200. return Error("unknown token when expecting an expression");
  201. case tok_identifier:
  202. return ParseIdentifierExpr();
  203. case tok_number:
  204. return ParseNumberExpr();
  205. case '(':
  206. return ParseParenExpr();
  207. case tok_if:
  208. return ParseIfExpr();
  209. case tok_for:
  210. return ParseForExpr();
  211. case tok_var:
  212. return ParseVarExpr();
  213. }
  214. }
  215. /// unary
  216. /// ::= primary
  217. /// ::= '!' unary
  218. static std::unique_ptr<ExprAST> ParseUnary() {
  219. // If the current token is not an operator, it must be a primary expr.
  220. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
  221. return ParsePrimary();
  222. // If this is a unary operator, read it.
  223. int Opc = CurTok;
  224. getNextToken();
  225. if (auto Operand = ParseUnary())
  226. return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
  227. return nullptr;
  228. }
  229. /// binoprhs
  230. /// ::= ('+' unary)*
  231. static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
  232. std::unique_ptr<ExprAST> LHS) {
  233. // If this is a binop, find its precedence.
  234. while (1) {
  235. int TokPrec = GetTokPrecedence();
  236. // If this is a binop that binds at least as tightly as the current binop,
  237. // consume it, otherwise we are done.
  238. if (TokPrec < ExprPrec)
  239. return LHS;
  240. // Okay, we know this is a binop.
  241. int BinOp = CurTok;
  242. getNextToken(); // eat binop
  243. // Parse the unary expression after the binary operator.
  244. auto RHS = ParseUnary();
  245. if (!RHS)
  246. return nullptr;
  247. // If BinOp binds less tightly with RHS than the operator after RHS, let
  248. // the pending operator take RHS as its LHS.
  249. int NextPrec = GetTokPrecedence();
  250. if (TokPrec < NextPrec) {
  251. RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
  252. if (!RHS)
  253. return nullptr;
  254. }
  255. // Merge LHS/RHS.
  256. LHS =
  257. llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
  258. }
  259. }
  260. /// expression
  261. /// ::= unary binoprhs
  262. ///
  263. static std::unique_ptr<ExprAST> ParseExpression() {
  264. auto LHS = ParseUnary();
  265. if (!LHS)
  266. return nullptr;
  267. return ParseBinOpRHS(0, std::move(LHS));
  268. }
  269. /// prototype
  270. /// ::= id '(' id* ')'
  271. /// ::= binary LETTER number? (id, id)
  272. /// ::= unary LETTER (id)
  273. static std::unique_ptr<PrototypeAST> ParsePrototype() {
  274. std::string FnName;
  275. unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
  276. unsigned BinaryPrecedence = 30;
  277. switch (CurTok) {
  278. default:
  279. return ErrorP("Expected function name in prototype");
  280. case tok_identifier:
  281. FnName = LexerObjects::IdentifierStr;
  282. Kind = 0;
  283. getNextToken();
  284. break;
  285. case tok_unary:
  286. getNextToken();
  287. if (!isascii(CurTok))
  288. return ErrorP("Expected unary operator");
  289. FnName = "unary";
  290. FnName += (char)CurTok;
  291. Kind = 1;
  292. getNextToken();
  293. break;
  294. case tok_binary:
  295. getNextToken();
  296. if (!isascii(CurTok))
  297. return ErrorP("Expected binary operator");
  298. FnName = "binary";
  299. FnName += (char)CurTok;
  300. Kind = 2;
  301. getNextToken();
  302. // Read the precedence if present.
  303. if (CurTok == tok_number) {
  304. if (LexerObjects::NumVal < 1 || LexerObjects::NumVal > 100)
  305. return ErrorP("Invalid precedecnce: must be 1..100");
  306. BinaryPrecedence = (unsigned)LexerObjects::NumVal;
  307. getNextToken();
  308. }
  309. break;
  310. }
  311. if (CurTok != '(')
  312. return ErrorP("Expected '(' in prototype");
  313. std::vector<std::string> ArgNames;
  314. while (getNextToken() == tok_identifier)
  315. ArgNames.push_back(LexerObjects::IdentifierStr);
  316. if (CurTok != ')')
  317. return ErrorP("Expected ')' in prototype");
  318. // success.
  319. getNextToken(); // eat ')'.
  320. // Verify right number of names for operator.
  321. if (Kind && ArgNames.size() != Kind)
  322. return ErrorP("Invalid number of operands for operator");
  323. return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
  324. BinaryPrecedence);
  325. }
  326. /// definition ::= 'def' prototype expression
  327. static std::unique_ptr<FunctionAST> ParseDefinition() {
  328. getNextToken(); // eat def.
  329. auto Proto = ParsePrototype();
  330. if (!Proto)
  331. return nullptr;
  332. if (auto E = ParseExpression())
  333. return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  334. return nullptr;
  335. }
  336. /// toplevelexpr ::= expression
  337. static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
  338. if (auto E = ParseExpression()) {
  339. // Make an anonymous proto.
  340. auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
  341. std::vector<std::string>());
  342. return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  343. }
  344. return nullptr;
  345. }
  346. /// external ::= 'extern' prototype
  347. static std::unique_ptr<PrototypeAST> ParseExtern() {
  348. getNextToken(); // eat extern.
  349. return ParsePrototype();
  350. }
  351. //===----------------------------------------------------------------------===//
  352. // Top-Level parsing and JIT Driver
  353. //===----------------------------------------------------------------------===//
  354. static void HandleDefinition() {
  355. if (auto FnAST = ParseDefinition()) {
  356. if (auto *FnIR = FnAST->codegen()) {
  357. fprintf(stderr, "Read function definition:");
  358. FnIR->dump();
  359. JITObjects::TheJIT->addModule(std::move(AstObjects::TheModule));
  360. InitializeModuleAndPassManager();
  361. }
  362. } else {
  363. // Skip token for error recovery.
  364. getNextToken();
  365. }
  366. }
  367. static void HandleExtern() {
  368. if (auto ProtoAST = ParseExtern()) {
  369. if (auto *FnIR = ProtoAST->codegen()) {
  370. fprintf(stderr, "Read extern: ");
  371. FnIR->dump();
  372. JITObjects::FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
  373. }
  374. } else {
  375. // Skip token for error recovery.
  376. getNextToken();
  377. }
  378. }
  379. static void HandleTopLevelExpression() {
  380. // Evaluate a top-level expression into an anonymous function.
  381. if (auto FnAST = ParseTopLevelExpr()) {
  382. if (FnAST->codegen()) {
  383. // JIT the module containing the anonymous expression, keeping a handle so
  384. // we can free it later.
  385. auto H = JITObjects::TheJIT->addModule(std::move(AstObjects::TheModule));
  386. InitializeModuleAndPassManager();
  387. // Search the JIT for the __anon_expr symbol.
  388. auto ExprSymbol = JITObjects::TheJIT->findSymbol("__anon_expr");
  389. assert(ExprSymbol && "Function not found");
  390. // Get the symbol's address and cast it to the right type (takes no
  391. // arguments, returns a double) so we can call it as a native function.
  392. double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
  393. fprintf(stderr, "Evaluated to %f\n", FP());
  394. // Delete the anonymous expression module from the JIT.
  395. JITObjects::TheJIT->removeModule(H);
  396. }
  397. } else {
  398. // Skip token for error recovery.
  399. getNextToken();
  400. }
  401. }
  402. /// top ::= definition | external | expression | ';'
  403. void MainLoop() {
  404. // Install standard binary operators.
  405. // 1 is lowest precedence.
  406. BinopPrecedence['='] = 2;
  407. BinopPrecedence['<'] = 10;
  408. BinopPrecedence['+'] = 20;
  409. BinopPrecedence['-'] = 20;
  410. BinopPrecedence['*'] = 40; // highest.
  411. while (1) {
  412. fprintf(stderr, "ready> ");
  413. switch (CurTok) {
  414. case tok_eof:
  415. return;
  416. case ';': // ignore top-level semicolons.
  417. getNextToken();
  418. break;
  419. case tok_def:
  420. HandleDefinition();
  421. break;
  422. case tok_extern:
  423. HandleExtern();
  424. break;
  425. default:
  426. HandleTopLevelExpression();
  427. break;
  428. }
  429. }
  430. }
  431. }