Parser.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. // Standard includes
  2. #include <cassert>
  3. #include <cstdint>
  4. #include <cstdio>
  5. #include <stdexcept>
  6. //LLVM includes
  7. // Local includes
  8. #include "Parser.h"
  9. #include "Lexer.h"
  10. #include "JIT.h"
  11. using namespace lexer;
  12. using namespace jit;
  13. namespace parser{
  14. /// putchard - putchar that takes a double and returns 0.
  15. extern "C" double putchard(double X) {
  16. fputc((char)X, stderr);
  17. return 0;
  18. }
  19. /// printd - printf that takes a double prints it as "%f\n", returning 0.
  20. extern "C" double printd(double X) {
  21. fprintf(stderr, "%f\n", X);
  22. return 0;
  23. }
  24. //===----------------------------------------------------------------------===//
  25. // Parser
  26. //===----------------------------------------------------------------------===//
  27. /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
  28. /// token the parser is looking at. getNextToken reads another token from the
  29. /// lexer and updates CurTok with its results.
  30. int getNextToken() { return CurTok = gettok(); }
  31. /// BinopPrecedence - This holds the precedence for each binary operator that is
  32. /// defined.
  33. /// GetTokPrecedence - Get the precedence of the pending binary operator token.
  34. static int GetTokPrecedence() {
  35. if (!isascii(CurTok))
  36. return -1;
  37. // Make sure it's a declared binop.
  38. int TokPrec = BinopPrecedence[CurTok];
  39. if (TokPrec <= 0)
  40. return -1;
  41. return TokPrec;
  42. }
  43. /// Error* - These are little helper functions for error handling.
  44. std::unique_ptr<ExprAST> Error(const char *Str) {
  45. fprintf(stderr, "Error: %s\n", Str);
  46. return nullptr;
  47. }
  48. std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
  49. Error(Str);
  50. return nullptr;
  51. }
  52. static std::unique_ptr<ExprAST> ParseExpression();
  53. /// numberexpr ::= number
  54. static std::unique_ptr<ExprAST> ParseNumberExpr() {
  55. auto Result = llvm::make_unique<NumberExprAST>(LexerObjects::NumVal);
  56. getNextToken(); // consume the number
  57. return std::move(Result);
  58. }
  59. /// parenexpr ::= '(' expression ')'
  60. static std::unique_ptr<ExprAST> ParseParenExpr() {
  61. getNextToken(); // eat (.
  62. auto V = ParseExpression();
  63. if (!V)
  64. return nullptr;
  65. if (CurTok != ')')
  66. return Error("expected ')'");
  67. getNextToken(); // eat ).
  68. return V;
  69. }
  70. /// identifierexpr
  71. /// ::= identifier
  72. /// ::= identifier '(' expression* ')'
  73. static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
  74. std::string IdName = LexerObjects::IdentifierStr;
  75. getNextToken(); // eat identifier.
  76. if (CurTok != '(') // Simple variable ref.
  77. return llvm::make_unique<VariableExprAST>(IdName);
  78. // Call.
  79. getNextToken(); // eat (
  80. std::vector<std::unique_ptr<ExprAST>> Args;
  81. if (CurTok != ')') {
  82. while (1) {
  83. if (auto Arg = ParseExpression())
  84. Args.push_back(std::move(Arg));
  85. else
  86. return nullptr;
  87. if (CurTok == ')')
  88. break;
  89. if (CurTok != ',')
  90. return Error("Expected ')' or ',' in argument list");
  91. getNextToken();
  92. }
  93. }
  94. // Eat the ')'.
  95. getNextToken();
  96. return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
  97. }
  98. /// ifexpr ::= 'if' expression 'then' expression 'else' expression
  99. static std::unique_ptr<ExprAST> ParseIfExpr() {
  100. getNextToken(); // eat the if.
  101. // condition.
  102. auto Cond = ParseExpression();
  103. if (!Cond)
  104. return nullptr;
  105. if (CurTok != tok_then)
  106. return Error("expected then");
  107. getNextToken(); // eat the then
  108. auto Then = ParseExpression();
  109. if (!Then)
  110. return nullptr;
  111. if (CurTok != tok_else)
  112. return Error("expected else");
  113. getNextToken();
  114. auto Else = ParseExpression();
  115. if (!Else)
  116. return nullptr;
  117. return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
  118. std::move(Else));
  119. }
  120. /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
  121. static std::unique_ptr<ExprAST> ParseForExpr() {
  122. getNextToken(); // eat the for.
  123. if (CurTok != tok_identifier)
  124. return Error("expected identifier after for");
  125. std::string IdName = LexerObjects::IdentifierStr;
  126. getNextToken(); // eat identifier.
  127. if (CurTok != '=')
  128. return Error("expected '=' after for");
  129. getNextToken(); // eat '='.
  130. auto Start = ParseExpression();
  131. if (!Start)
  132. return nullptr;
  133. if (CurTok != ',')
  134. return Error("expected ',' after for start value");
  135. getNextToken();
  136. auto End = ParseExpression();
  137. if (!End)
  138. return nullptr;
  139. // The step value is optional.
  140. std::unique_ptr<ExprAST> Step;
  141. if (CurTok == ',') {
  142. getNextToken();
  143. Step = ParseExpression();
  144. if (!Step)
  145. return nullptr;
  146. }
  147. if (CurTok != tok_in)
  148. return Error("expected 'in' after for");
  149. getNextToken(); // eat 'in'.
  150. auto Body = ParseExpression();
  151. if (!Body)
  152. return nullptr;
  153. return llvm::make_unique<ForExprAST>(IdName, std::move(Start),
  154. std::move(End), std::move(Step),
  155. std::move(Body));
  156. }
  157. /// primary
  158. /// ::= identifierexpr
  159. /// ::= numberexpr
  160. /// ::= parenexpr
  161. static std::unique_ptr<ExprAST> ParsePrimary() {
  162. switch (CurTok) {
  163. default:
  164. return Error("unknown token when expecting an expression");
  165. case tok_identifier:
  166. return ParseIdentifierExpr();
  167. case tok_number:
  168. return ParseNumberExpr();
  169. case '(':
  170. return ParseParenExpr();
  171. case tok_if:
  172. return ParseIfExpr();
  173. case tok_for:
  174. return ParseForExpr();
  175. }
  176. }
  177. /// unary
  178. /// ::= primary
  179. /// ::= '!' unary
  180. static std::unique_ptr<ExprAST> ParseUnary() {
  181. // If the current token is not an operator, it must be a primary expr.
  182. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
  183. return ParsePrimary();
  184. // If this is a unary operator, read it.
  185. int Opc = CurTok;
  186. getNextToken();
  187. if (auto Operand = ParseUnary())
  188. return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
  189. return nullptr;
  190. }
  191. /// binoprhs
  192. /// ::= ('+' unary)*
  193. static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
  194. std::unique_ptr<ExprAST> LHS) {
  195. // If this is a binop, find its precedence.
  196. while (1) {
  197. int TokPrec = GetTokPrecedence();
  198. // If this is a binop that binds at least as tightly as the current binop,
  199. // consume it, otherwise we are done.
  200. if (TokPrec < ExprPrec)
  201. return LHS;
  202. // Okay, we know this is a binop.
  203. int BinOp = CurTok;
  204. getNextToken(); // eat binop
  205. // Parse the unary expression after the binary operator.
  206. auto RHS = ParseUnary();
  207. if (!RHS)
  208. return nullptr;
  209. // If BinOp binds less tightly with RHS than the operator after RHS, let
  210. // the pending operator take RHS as its LHS.
  211. int NextPrec = GetTokPrecedence();
  212. if (TokPrec < NextPrec) {
  213. RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
  214. if (!RHS)
  215. return nullptr;
  216. }
  217. // Merge LHS/RHS.
  218. LHS =
  219. llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
  220. }
  221. }
  222. /// expression
  223. /// ::= unary binoprhs
  224. ///
  225. static std::unique_ptr<ExprAST> ParseExpression() {
  226. auto LHS = ParseUnary();
  227. if (!LHS)
  228. return nullptr;
  229. return ParseBinOpRHS(0, std::move(LHS));
  230. }
  231. /// prototype
  232. /// ::= id '(' id* ')'
  233. /// ::= binary LETTER number? (id, id)
  234. /// ::= unary LETTER (id)
  235. static std::unique_ptr<PrototypeAST> ParsePrototype() {
  236. std::string FnName;
  237. unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
  238. unsigned BinaryPrecedence = 30;
  239. switch (CurTok) {
  240. default:
  241. return ErrorP("Expected function name in prototype");
  242. case tok_identifier:
  243. FnName = LexerObjects::IdentifierStr;
  244. Kind = 0;
  245. getNextToken();
  246. break;
  247. case tok_unary:
  248. getNextToken();
  249. if (!isascii(CurTok))
  250. return ErrorP("Expected unary operator");
  251. FnName = "unary";
  252. FnName += (char)CurTok;
  253. Kind = 1;
  254. getNextToken();
  255. break;
  256. case tok_binary:
  257. getNextToken();
  258. if (!isascii(CurTok))
  259. return ErrorP("Expected binary operator");
  260. FnName = "binary";
  261. FnName += (char)CurTok;
  262. Kind = 2;
  263. getNextToken();
  264. // Read the precedence if present.
  265. if (CurTok == tok_number) {
  266. if (LexerObjects::NumVal < 1 || LexerObjects::NumVal > 100)
  267. return ErrorP("Invalid precedecnce: must be 1..100");
  268. BinaryPrecedence = (unsigned)LexerObjects::NumVal;
  269. getNextToken();
  270. }
  271. break;
  272. }
  273. if (CurTok != '(')
  274. return ErrorP("Expected '(' in prototype");
  275. std::vector<std::string> ArgNames;
  276. while (getNextToken() == tok_identifier)
  277. ArgNames.push_back(LexerObjects::IdentifierStr);
  278. if (CurTok != ')')
  279. return ErrorP("Expected ')' in prototype");
  280. // success.
  281. getNextToken(); // eat ')'.
  282. // Verify right number of names for operator.
  283. if (Kind && ArgNames.size() != Kind)
  284. return ErrorP("Invalid number of operands for operator");
  285. return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
  286. BinaryPrecedence);
  287. }
  288. /// definition ::= 'def' prototype expression
  289. static std::unique_ptr<FunctionAST> ParseDefinition() {
  290. getNextToken(); // eat def.
  291. auto Proto = ParsePrototype();
  292. if (!Proto)
  293. return nullptr;
  294. if (auto E = ParseExpression())
  295. return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  296. return nullptr;
  297. }
  298. /// toplevelexpr ::= expression
  299. static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
  300. if (auto E = ParseExpression()) {
  301. // Make an anonymous proto.
  302. auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
  303. std::vector<std::string>());
  304. return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  305. }
  306. return nullptr;
  307. }
  308. /// external ::= 'extern' prototype
  309. static std::unique_ptr<PrototypeAST> ParseExtern() {
  310. getNextToken(); // eat extern.
  311. return ParsePrototype();
  312. }
  313. //===----------------------------------------------------------------------===//
  314. // Top-Level parsing and JIT Driver
  315. //===----------------------------------------------------------------------===//
  316. static void HandleDefinition() {
  317. if (auto FnAST = ParseDefinition()) {
  318. if (auto *FnIR = FnAST->codegen()) {
  319. fprintf(stderr, "Read function definition:");
  320. FnIR->dump();
  321. JITObjects::TheJIT->addModule(std::move(AstObjects::TheModule));
  322. InitializeModuleAndPassManager();
  323. }
  324. } else {
  325. // Skip token for error recovery.
  326. getNextToken();
  327. }
  328. }
  329. static void HandleExtern() {
  330. if (auto ProtoAST = ParseExtern()) {
  331. if (auto *FnIR = ProtoAST->codegen()) {
  332. fprintf(stderr, "Read extern: ");
  333. FnIR->dump();
  334. JITObjects::FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
  335. }
  336. } else {
  337. // Skip token for error recovery.
  338. getNextToken();
  339. }
  340. }
  341. static void HandleTopLevelExpression() {
  342. // Evaluate a top-level expression into an anonymous function.
  343. if (auto FnAST = ParseTopLevelExpr()) {
  344. if (FnAST->codegen()) {
  345. // JIT the module containing the anonymous expression, keeping a handle so
  346. // we can free it later.
  347. auto H = JITObjects::TheJIT->addModule(std::move(AstObjects::TheModule));
  348. InitializeModuleAndPassManager();
  349. // Search the JIT for the __anon_expr symbol.
  350. auto ExprSymbol = JITObjects::TheJIT->findSymbol("__anon_expr");
  351. assert(ExprSymbol && "Function not found");
  352. // Get the symbol's address and cast it to the right type (takes no
  353. // arguments, returns a double) so we can call it as a native function.
  354. double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
  355. fprintf(stderr, "Evaluated to %f\n", FP());
  356. // Delete the anonymous expression module from the JIT.
  357. JITObjects::TheJIT->removeModule(H);
  358. }
  359. } else {
  360. // Skip token for error recovery.
  361. getNextToken();
  362. }
  363. }
  364. /// top ::= definition | external | expression | ';'
  365. void MainLoop() {
  366. // Install standard binary operators.
  367. // 1 is lowest precedence.
  368. BinopPrecedence['<'] = 10;
  369. BinopPrecedence['+'] = 20;
  370. BinopPrecedence['-'] = 20;
  371. BinopPrecedence['*'] = 40; // highest.
  372. while (1) {
  373. fprintf(stderr, "ready> ");
  374. switch (CurTok) {
  375. case tok_eof:
  376. return;
  377. case ';': // ignore top-level semicolons.
  378. getNextToken();
  379. break;
  380. case tok_def:
  381. HandleDefinition();
  382. break;
  383. case tok_extern:
  384. HandleExtern();
  385. break;
  386. default:
  387. HandleTopLevelExpression();
  388. break;
  389. }
  390. }
  391. }
  392. }