Parser.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. // Standard includes
  2. #include <cassert>
  3. #include <cstdint>
  4. #include <cstdio>
  5. #include <stdexcept>
  6. //LLVM includes
  7. // Local includes
  8. #include "Parser.h"
  9. #include "Lexer.h"
  10. #include "JIT.h"
  11. using namespace lexer;
  12. using namespace jit;
  13. namespace parser {
  14. /// putchard - putchar that takes a double and returns 0.
  15. extern "C" double putchard(double X) {
  16. fputc((char)X, stderr);
  17. return 0;
  18. }
  19. /// printd - printf that takes a double prints it as "%f\n", returning 0.
  20. extern "C" double printd(double X) {
  21. fprintf(stderr, "%f\n", X);
  22. return 0;
  23. }
  24. //===----------------------------------------------------------------------===//
  25. // Parser
  26. //===----------------------------------------------------------------------===//
  27. /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
  28. /// token the parser is looking at. getNextToken reads another token from the
  29. /// lexer and updates CurTok with its results.
  30. int getNextToken() { return CurTok = gettok(); }
  31. /// BinopPrecedence - This holds the precedence for each binary operator that is
  32. /// defined.
  33. /// GetTokPrecedence - Get the precedence of the pending binary operator token.
  34. static int GetTokPrecedence() {
  35. if (!isascii(CurTok))
  36. return -1;
  37. // Make sure it's a declared binop.
  38. int TokPrec = BinopPrecedence[CurTok];
  39. if (TokPrec <= 0)
  40. return -1;
  41. return TokPrec;
  42. }
  43. /// Error* - These are little helper functions for error handling.
  44. std::unique_ptr<ExprAST> Error(const char *Str) {
  45. fprintf(stderr, "Error: %s\n", Str);
  46. return nullptr;
  47. }
  48. std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
  49. Error(Str);
  50. return nullptr;
  51. }
  52. static std::unique_ptr<ExprAST> ParseExpression();
  53. /// numberexpr ::= number
  54. static std::unique_ptr<ExprAST> ParseNumberExpr() {
  55. auto Result = llvm::make_unique<NumberExprAST>(LexerObjects::NumVal);
  56. getNextToken(); // consume the number
  57. return std::move(Result);
  58. }
  59. /// parenexpr ::= '(' expression ')'
  60. static std::unique_ptr<ExprAST> ParseParenExpr() {
  61. getNextToken(); // eat (.
  62. auto V = ParseExpression();
  63. if (!V)
  64. return nullptr;
  65. if (CurTok != ')')
  66. return Error("expected ')'");
  67. getNextToken(); // eat ).
  68. return V;
  69. }
  70. /// identifierexpr
  71. /// ::= identifier
  72. /// ::= identifier '(' expression* ')'
  73. static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
  74. std::string IdName = LexerObjects::IdentifierStr;
  75. SourceLocation LitLoc = CurLoc;
  76. getNextToken(); // eat identifier.
  77. if (CurTok != '(') // Simple variable ref.
  78. return llvm::make_unique<VariableExprAST>(LitLoc, IdName);
  79. // Call.
  80. getNextToken(); // eat (
  81. std::vector<std::unique_ptr<ExprAST>> Args;
  82. if (CurTok != ')') {
  83. while (1) {
  84. if (auto Arg = ParseExpression())
  85. Args.push_back(std::move(Arg));
  86. else
  87. return nullptr;
  88. if (CurTok == ')')
  89. break;
  90. if (CurTok != ',')
  91. return Error("Expected ')' or ',' in argument list");
  92. getNextToken();
  93. }
  94. }
  95. // Eat the ')'.
  96. getNextToken();
  97. return llvm::make_unique<CallExprAST>(LitLoc, IdName, std::move(Args));
  98. }
  99. /// ifexpr ::= 'if' expression 'then' expression 'else' expression
  100. static std::unique_ptr<ExprAST> ParseIfExpr() {
  101. SourceLocation IfLoc = CurLoc;
  102. getNextToken(); // eat the if.
  103. // condition.
  104. auto Cond = ParseExpression();
  105. if (!Cond)
  106. return nullptr;
  107. if (CurTok != tok_then)
  108. return Error("expected then");
  109. getNextToken(); // eat the then
  110. auto Then = ParseExpression();
  111. if (!Then)
  112. return nullptr;
  113. if (CurTok != tok_else)
  114. return Error("expected else");
  115. getNextToken();
  116. auto Else = ParseExpression();
  117. if (!Else)
  118. return nullptr;
  119. return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
  120. std::move(Else));
  121. }
  122. /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
  123. static std::unique_ptr<ExprAST> ParseForExpr() {
  124. getNextToken(); // eat the for.
  125. if (CurTok != tok_identifier)
  126. return Error("expected identifier after for");
  127. std::string IdName = LexerObjects::IdentifierStr;
  128. getNextToken(); // eat identifier.
  129. if (CurTok != '=')
  130. return Error("expected '=' after for");
  131. getNextToken(); // eat '='.
  132. auto Start = ParseExpression();
  133. if (!Start)
  134. return nullptr;
  135. if (CurTok != ',')
  136. return Error("expected ',' after for start value");
  137. getNextToken();
  138. auto End = ParseExpression();
  139. if (!End)
  140. return nullptr;
  141. // The step value is optional.
  142. std::unique_ptr<ExprAST> Step;
  143. if (CurTok == ',') {
  144. getNextToken();
  145. Step = ParseExpression();
  146. if (!Step)
  147. return nullptr;
  148. }
  149. if (CurTok != tok_in)
  150. return Error("expected 'in' after for");
  151. getNextToken(); // eat 'in'.
  152. auto Body = ParseExpression();
  153. if (!Body)
  154. return nullptr;
  155. return llvm::make_unique<ForExprAST>(IdName, std::move(Start),
  156. std::move(End), std::move(Step),
  157. std::move(Body));
  158. }
  159. /// varexpr ::= 'var' identifier ('=' expression)?
  160. // (',' identifier ('=' expression)?)* 'in' expression
  161. static std::unique_ptr<ExprAST> ParseVarExpr() {
  162. getNextToken(); // eat the var.
  163. std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
  164. // At least one variable name is required.
  165. if (CurTok != tok_identifier)
  166. return Error("expected identifier after var");
  167. while (1) {
  168. std::string Name = LexerObjects::IdentifierStr;
  169. getNextToken(); // eat identifier.
  170. // Read the optional initializer.
  171. std::unique_ptr<ExprAST> Init = nullptr;
  172. if (CurTok == '=') {
  173. getNextToken(); // eat the '='.
  174. Init = ParseExpression();
  175. if (!Init)
  176. return nullptr;
  177. }
  178. VarNames.push_back(std::make_pair(Name, std::move(Init)));
  179. // End of var list, exit loop.
  180. if (CurTok != ',')
  181. break;
  182. getNextToken(); // eat the ','.
  183. if (CurTok != tok_identifier)
  184. return Error("expected identifier list after var");
  185. }
  186. // At this point, we have to have 'in'.
  187. if (CurTok != tok_in)
  188. return Error("expected 'in' keyword after 'var'");
  189. getNextToken(); // eat 'in'.
  190. auto Body = ParseExpression();
  191. if (!Body)
  192. return nullptr;
  193. return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
  194. }
  195. /// primary
  196. /// ::= identifierexpr
  197. /// ::= numberexpr
  198. /// ::= parenexpr
  199. static std::unique_ptr<ExprAST> ParsePrimary() {
  200. switch (CurTok) {
  201. default:
  202. return Error("unknown token when expecting an expression");
  203. case tok_identifier:
  204. return ParseIdentifierExpr();
  205. case tok_number:
  206. return ParseNumberExpr();
  207. case '(':
  208. return ParseParenExpr();
  209. case tok_if:
  210. return ParseIfExpr();
  211. case tok_for:
  212. return ParseForExpr();
  213. case tok_var:
  214. return ParseVarExpr();
  215. }
  216. }
  217. /// unary
  218. /// ::= primary
  219. /// ::= '!' unary
  220. static std::unique_ptr<ExprAST> ParseUnary() {
  221. // If the current token is not an operator, it must be a primary expr.
  222. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
  223. return ParsePrimary();
  224. // If this is a unary operator, read it.
  225. int Opc = CurTok;
  226. getNextToken();
  227. if (auto Operand = ParseUnary())
  228. return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
  229. return nullptr;
  230. }
  231. /// binoprhs
  232. /// ::= ('+' unary)*
  233. static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
  234. std::unique_ptr<ExprAST> LHS) {
  235. // If this is a binop, find its precedence.
  236. while (1) {
  237. int TokPrec = GetTokPrecedence();
  238. // If this is a binop that binds at least as tightly as the current binop,
  239. // consume it, otherwise we are done.
  240. if (TokPrec < ExprPrec)
  241. return LHS;
  242. // Okay, we know this is a binop.
  243. int BinOp = CurTok;
  244. SourceLocation BinLoc = CurLoc;
  245. getNextToken(); // eat binop
  246. // Parse the unary expression after the binary operator.
  247. auto RHS = ParseUnary();
  248. if (!RHS)
  249. return nullptr;
  250. // If BinOp binds less tightly with RHS than the operator after RHS, let
  251. // the pending operator take RHS as its LHS.
  252. int NextPrec = GetTokPrecedence();
  253. if (TokPrec < NextPrec) {
  254. RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
  255. if (!RHS)
  256. return nullptr;
  257. }
  258. // Merge LHS/RHS.
  259. LHS = llvm::make_unique<BinaryExprAST>(BinLoc, BinOp, std::move(LHS),
  260. std::move(RHS));
  261. }
  262. }
  263. /// expression
  264. /// ::= unary binoprhs
  265. ///
  266. static std::unique_ptr<ExprAST> ParseExpression() {
  267. auto LHS = ParseUnary();
  268. if (!LHS)
  269. return nullptr;
  270. return ParseBinOpRHS(0, std::move(LHS));
  271. }
  272. /// prototype
  273. /// ::= id '(' id* ')'
  274. /// ::= binary LETTER number? (id, id)
  275. /// ::= unary LETTER (id)
  276. static std::unique_ptr<PrototypeAST> ParsePrototype() {
  277. std::string FnName;
  278. SourceLocation FnLoc = CurLoc;
  279. unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
  280. unsigned BinaryPrecedence = 30;
  281. switch (CurTok) {
  282. default:
  283. return ErrorP("Expected function name in prototype");
  284. case tok_identifier:
  285. FnName = LexerObjects::IdentifierStr;
  286. Kind = 0;
  287. getNextToken();
  288. break;
  289. case tok_unary:
  290. getNextToken();
  291. if (!isascii(CurTok))
  292. return ErrorP("Expected unary operator");
  293. FnName = "unary";
  294. FnName += (char)CurTok;
  295. Kind = 1;
  296. getNextToken();
  297. break;
  298. case tok_binary:
  299. getNextToken();
  300. if (!isascii(CurTok))
  301. return ErrorP("Expected binary operator");
  302. FnName = "binary";
  303. FnName += (char)CurTok;
  304. Kind = 2;
  305. getNextToken();
  306. // Read the precedence if present.
  307. if (CurTok == tok_number) {
  308. if (LexerObjects::NumVal < 1 || LexerObjects::NumVal > 100)
  309. return ErrorP("Invalid precedecnce: must be 1..100");
  310. BinaryPrecedence = (unsigned)LexerObjects::NumVal;
  311. getNextToken();
  312. }
  313. break;
  314. }
  315. if (CurTok != '(')
  316. return ErrorP("Expected '(' in prototype");
  317. std::vector<std::string> ArgNames;
  318. while (getNextToken() == tok_identifier)
  319. ArgNames.push_back(LexerObjects::IdentifierStr);
  320. if (CurTok != ')')
  321. return ErrorP("Expected ')' in prototype");
  322. // success.
  323. getNextToken(); // eat ')'.
  324. // Verify right number of names for operator.
  325. if (Kind && ArgNames.size() != Kind)
  326. return ErrorP("Invalid number of operands for operator");
  327. return llvm::make_unique<PrototypeAST>(FnLoc, FnName, ArgNames, Kind != 0,
  328. BinaryPrecedence);
  329. }
  330. /// definition ::= 'def' prototype expression
  331. static std::unique_ptr<FunctionAST> ParseDefinition() {
  332. getNextToken(); // eat def.
  333. auto Proto = ParsePrototype();
  334. if (!Proto)
  335. return nullptr;
  336. if (auto E = ParseExpression())
  337. return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  338. return nullptr;
  339. }
  340. /// toplevelexpr ::= expression
  341. static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
  342. SourceLocation FnLoc = CurLoc;
  343. if (auto E = ParseExpression()) {
  344. // Make an anonymous proto.
  345. auto Proto = llvm::make_unique<PrototypeAST>(FnLoc, "__anon_expr",
  346. std::vector<std::string>());
  347. return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  348. }
  349. return nullptr;
  350. }
  351. /// external ::= 'extern' prototype
  352. static std::unique_ptr<PrototypeAST> ParseExtern() {
  353. getNextToken(); // eat extern.
  354. return ParsePrototype();
  355. }
  356. //===----------------------------------------------------------------------===//
  357. // Top-Level parsing and JIT Driver
  358. //===----------------------------------------------------------------------===//
  359. static void HandleDefinition() {
  360. if (auto FnAST = ParseDefinition()) {
  361. if (!FnAST->codegen())
  362. fprintf(stderr, "Error reading function definition:");
  363. } else {
  364. // Skip token for error recovery.
  365. getNextToken();
  366. }
  367. }
  368. static void HandleExtern() {
  369. if (auto ProtoAST = ParseExtern()) {
  370. if (!ProtoAST->codegen())
  371. fprintf(stderr, "Error reading extern");
  372. else
  373. FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
  374. } else {
  375. // Skip token for error recovery.
  376. getNextToken();
  377. }
  378. }
  379. static void HandleTopLevelExpression() {
  380. // Evaluate a top-level expression into an anonymous function.
  381. if (auto FnAST = ParseTopLevelExpr()) {
  382. if (!FnAST->codegen()) {
  383. fprintf(stderr, "Error generating code for top level expr");
  384. }
  385. } else {
  386. // Skip token for error recovery.
  387. getNextToken();
  388. }
  389. }
  390. /// top ::= definition | external | expression | ';'
  391. void MainLoop() {
  392. // Install standard binary operators.
  393. // 1 is lowest precedence.
  394. BinopPrecedence['='] = 2;
  395. BinopPrecedence['<'] = 10;
  396. BinopPrecedence['+'] = 20;
  397. BinopPrecedence['-'] = 20;
  398. BinopPrecedence['*'] = 40; // highest.
  399. while (1) {
  400. switch (CurTok) {
  401. case tok_eof:
  402. return;
  403. case ';': // ignore top-level semicolons.
  404. getNextToken();
  405. break;
  406. case tok_def:
  407. HandleDefinition();
  408. break;
  409. case tok_extern:
  410. HandleExtern();
  411. break;
  412. default:
  413. HandleTopLevelExpression();
  414. break;
  415. }
  416. }
  417. }
  418. }