Chapter7.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230
  1. #include "llvm/ADT/STLExtras.h"
  2. #include "llvm/Analysis/Passes.h"
  3. #include "llvm/IR/IRBuilder.h"
  4. #include "llvm/IR/LLVMContext.h"
  5. #include "llvm/IR/LegacyPassManager.h"
  6. #include "llvm/IR/Module.h"
  7. #include "llvm/IR/Verifier.h"
  8. #include "llvm/Support/TargetSelect.h"
  9. #include "llvm/Transforms/Scalar.h"
  10. #include <cctype>
  11. #include <cstdio>
  12. #include <map>
  13. #include <string>
  14. #include <vector>
  15. #include "../KaleidoscopeJIT.h"
  16. using namespace llvm;
  17. using namespace llvm::orc;
  18. //===----------------------------------------------------------------------===//
  19. // Lexer
  20. //===----------------------------------------------------------------------===//
  21. // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
  22. // of these for known things.
  23. enum Token {
  24. tok_eof = -1,
  25. // commands
  26. tok_def = -2,
  27. tok_extern = -3,
  28. // primary
  29. tok_identifier = -4,
  30. tok_number = -5,
  31. // control
  32. tok_if = -6,
  33. tok_then = -7,
  34. tok_else = -8,
  35. tok_for = -9,
  36. tok_in = -10,
  37. // operators
  38. tok_binary = -11,
  39. tok_unary = -12,
  40. // var definition
  41. tok_var = -13
  42. };
  43. static std::string IdentifierStr; // Filled in if tok_identifier
  44. static double NumVal; // Filled in if tok_number
  45. /// gettok - Return the next token from standard input.
  46. static int gettok() {
  47. static int LastChar = ' ';
  48. // Skip any whitespace.
  49. while (isspace(LastChar))
  50. LastChar = getchar();
  51. if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
  52. IdentifierStr = LastChar;
  53. while (isalnum((LastChar = getchar())))
  54. IdentifierStr += LastChar;
  55. if (IdentifierStr == "def")
  56. return tok_def;
  57. if (IdentifierStr == "extern")
  58. return tok_extern;
  59. if (IdentifierStr == "if")
  60. return tok_if;
  61. if (IdentifierStr == "then")
  62. return tok_then;
  63. if (IdentifierStr == "else")
  64. return tok_else;
  65. if (IdentifierStr == "for")
  66. return tok_for;
  67. if (IdentifierStr == "in")
  68. return tok_in;
  69. if (IdentifierStr == "binary")
  70. return tok_binary;
  71. if (IdentifierStr == "unary")
  72. return tok_unary;
  73. if (IdentifierStr == "var")
  74. return tok_var;
  75. return tok_identifier;
  76. }
  77. if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
  78. std::string NumStr;
  79. do {
  80. NumStr += LastChar;
  81. LastChar = getchar();
  82. } while (isdigit(LastChar) || LastChar == '.');
  83. NumVal = strtod(NumStr.c_str(), nullptr);
  84. return tok_number;
  85. }
  86. if (LastChar == '#') {
  87. // Comment until end of line.
  88. do
  89. LastChar = getchar();
  90. while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
  91. if (LastChar != EOF)
  92. return gettok();
  93. }
  94. // Check for end of file. Don't eat the EOF.
  95. if (LastChar == EOF)
  96. return tok_eof;
  97. // Otherwise, just return the character as its ascii value.
  98. int ThisChar = LastChar;
  99. LastChar = getchar();
  100. return ThisChar;
  101. }
  102. //===----------------------------------------------------------------------===//
  103. // Abstract Syntax Tree (aka Parse Tree)
  104. //===----------------------------------------------------------------------===//
  105. namespace {
  106. /// ExprAST - Base class for all expression nodes.
  107. class ExprAST {
  108. public:
  109. virtual ~ExprAST() {}
  110. virtual Value *codegen() = 0;
  111. };
  112. /// NumberExprAST - Expression class for numeric literals like "1.0".
  113. class NumberExprAST : public ExprAST {
  114. double Val;
  115. public:
  116. NumberExprAST(double Val) : Val(Val) {}
  117. Value *codegen() override;
  118. };
  119. /// VariableExprAST - Expression class for referencing a variable, like "a".
  120. class VariableExprAST : public ExprAST {
  121. std::string Name;
  122. public:
  123. VariableExprAST(const std::string &Name) : Name(Name) {}
  124. const std::string &getName() const { return Name; }
  125. Value *codegen() override;
  126. };
  127. /// UnaryExprAST - Expression class for a unary operator.
  128. class UnaryExprAST : public ExprAST {
  129. char Opcode;
  130. std::unique_ptr<ExprAST> Operand;
  131. public:
  132. UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
  133. : Opcode(Opcode), Operand(std::move(Operand)) {}
  134. Value *codegen() override;
  135. };
  136. /// BinaryExprAST - Expression class for a binary operator.
  137. class BinaryExprAST : public ExprAST {
  138. char Op;
  139. std::unique_ptr<ExprAST> LHS, RHS;
  140. public:
  141. BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
  142. std::unique_ptr<ExprAST> RHS)
  143. : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
  144. Value *codegen() override;
  145. };
  146. /// CallExprAST - Expression class for function calls.
  147. class CallExprAST : public ExprAST {
  148. std::string Callee;
  149. std::vector<std::unique_ptr<ExprAST>> Args;
  150. public:
  151. CallExprAST(const std::string &Callee,
  152. std::vector<std::unique_ptr<ExprAST>> Args)
  153. : Callee(Callee), Args(std::move(Args)) {}
  154. Value *codegen() override;
  155. };
  156. /// IfExprAST - Expression class for if/then/else.
  157. class IfExprAST : public ExprAST {
  158. std::unique_ptr<ExprAST> Cond, Then, Else;
  159. public:
  160. IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
  161. std::unique_ptr<ExprAST> Else)
  162. : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
  163. Value *codegen() override;
  164. };
  165. /// ForExprAST - Expression class for for/in.
  166. class ForExprAST : public ExprAST {
  167. std::string VarName;
  168. std::unique_ptr<ExprAST> Start, End, Step, Body;
  169. public:
  170. ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
  171. std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
  172. std::unique_ptr<ExprAST> Body)
  173. : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
  174. Step(std::move(Step)), Body(std::move(Body)) {}
  175. Value *codegen() override;
  176. };
  177. /// VarExprAST - Expression class for var/in
  178. class VarExprAST : public ExprAST {
  179. std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
  180. std::unique_ptr<ExprAST> Body;
  181. public:
  182. VarExprAST(
  183. std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
  184. std::unique_ptr<ExprAST> Body)
  185. : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
  186. Value *codegen() override;
  187. };
  188. /// PrototypeAST - This class represents the "prototype" for a function,
  189. /// which captures its name, and its argument names (thus implicitly the number
  190. /// of arguments the function takes), as well as if it is an operator.
  191. class PrototypeAST {
  192. std::string Name;
  193. std::vector<std::string> Args;
  194. bool IsOperator;
  195. unsigned Precedence; // Precedence if a binary op.
  196. public:
  197. PrototypeAST(const std::string &Name, std::vector<std::string> Args,
  198. bool IsOperator = false, unsigned Prec = 0)
  199. : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
  200. Precedence(Prec) {}
  201. Function *codegen();
  202. const std::string &getName() const { return Name; }
  203. bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
  204. bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
  205. char getOperatorName() const {
  206. assert(isUnaryOp() || isBinaryOp());
  207. return Name[Name.size() - 1];
  208. }
  209. unsigned getBinaryPrecedence() const { return Precedence; }
  210. };
  211. /// FunctionAST - This class represents a function definition itself.
  212. class FunctionAST {
  213. std::unique_ptr<PrototypeAST> Proto;
  214. std::unique_ptr<ExprAST> Body;
  215. public:
  216. FunctionAST(std::unique_ptr<PrototypeAST> Proto,
  217. std::unique_ptr<ExprAST> Body)
  218. : Proto(std::move(Proto)), Body(std::move(Body)) {}
  219. Function *codegen();
  220. };
  221. } // end anonymous namespace
  222. //===----------------------------------------------------------------------===//
  223. // Parser
  224. //===----------------------------------------------------------------------===//
  225. /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
  226. /// token the parser is looking at. getNextToken reads another token from the
  227. /// lexer and updates CurTok with its results.
  228. static int CurTok;
  229. static int getNextToken() { return CurTok = gettok(); }
  230. /// BinopPrecedence - This holds the precedence for each binary operator that is
  231. /// defined.
  232. static std::map<char, int> BinopPrecedence;
  233. /// GetTokPrecedence - Get the precedence of the pending binary operator token.
  234. static int GetTokPrecedence() {
  235. if (!isascii(CurTok))
  236. return -1;
  237. // Make sure it's a declared binop.
  238. int TokPrec = BinopPrecedence[CurTok];
  239. if (TokPrec <= 0)
  240. return -1;
  241. return TokPrec;
  242. }
  243. /// Error* - These are little helper functions for error handling.
  244. std::unique_ptr<ExprAST> Error(const char *Str) {
  245. fprintf(stderr, "Error: %s\n", Str);
  246. return nullptr;
  247. }
  248. std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
  249. Error(Str);
  250. return nullptr;
  251. }
  252. static std::unique_ptr<ExprAST> ParseExpression();
  253. /// numberexpr ::= number
  254. static std::unique_ptr<ExprAST> ParseNumberExpr() {
  255. auto Result = llvm::make_unique<NumberExprAST>(NumVal);
  256. getNextToken(); // consume the number
  257. return std::move(Result);
  258. }
  259. /// parenexpr ::= '(' expression ')'
  260. static std::unique_ptr<ExprAST> ParseParenExpr() {
  261. getNextToken(); // eat (.
  262. auto V = ParseExpression();
  263. if (!V)
  264. return nullptr;
  265. if (CurTok != ')')
  266. return Error("expected ')'");
  267. getNextToken(); // eat ).
  268. return V;
  269. }
  270. /// identifierexpr
  271. /// ::= identifier
  272. /// ::= identifier '(' expression* ')'
  273. static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
  274. std::string IdName = IdentifierStr;
  275. getNextToken(); // eat identifier.
  276. if (CurTok != '(') // Simple variable ref.
  277. return llvm::make_unique<VariableExprAST>(IdName);
  278. // Call.
  279. getNextToken(); // eat (
  280. std::vector<std::unique_ptr<ExprAST>> Args;
  281. if (CurTok != ')') {
  282. while (1) {
  283. if (auto Arg = ParseExpression())
  284. Args.push_back(std::move(Arg));
  285. else
  286. return nullptr;
  287. if (CurTok == ')')
  288. break;
  289. if (CurTok != ',')
  290. return Error("Expected ')' or ',' in argument list");
  291. getNextToken();
  292. }
  293. }
  294. // Eat the ')'.
  295. getNextToken();
  296. return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
  297. }
  298. /// ifexpr ::= 'if' expression 'then' expression 'else' expression
  299. static std::unique_ptr<ExprAST> ParseIfExpr() {
  300. getNextToken(); // eat the if.
  301. // condition.
  302. auto Cond = ParseExpression();
  303. if (!Cond)
  304. return nullptr;
  305. if (CurTok != tok_then)
  306. return Error("expected then");
  307. getNextToken(); // eat the then
  308. auto Then = ParseExpression();
  309. if (!Then)
  310. return nullptr;
  311. if (CurTok != tok_else)
  312. return Error("expected else");
  313. getNextToken();
  314. auto Else = ParseExpression();
  315. if (!Else)
  316. return nullptr;
  317. return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
  318. std::move(Else));
  319. }
  320. /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
  321. static std::unique_ptr<ExprAST> ParseForExpr() {
  322. getNextToken(); // eat the for.
  323. if (CurTok != tok_identifier)
  324. return Error("expected identifier after for");
  325. std::string IdName = IdentifierStr;
  326. getNextToken(); // eat identifier.
  327. if (CurTok != '=')
  328. return Error("expected '=' after for");
  329. getNextToken(); // eat '='.
  330. auto Start = ParseExpression();
  331. if (!Start)
  332. return nullptr;
  333. if (CurTok != ',')
  334. return Error("expected ',' after for start value");
  335. getNextToken();
  336. auto End = ParseExpression();
  337. if (!End)
  338. return nullptr;
  339. // The step value is optional.
  340. std::unique_ptr<ExprAST> Step;
  341. if (CurTok == ',') {
  342. getNextToken();
  343. Step = ParseExpression();
  344. if (!Step)
  345. return nullptr;
  346. }
  347. if (CurTok != tok_in)
  348. return Error("expected 'in' after for");
  349. getNextToken(); // eat 'in'.
  350. auto Body = ParseExpression();
  351. if (!Body)
  352. return nullptr;
  353. return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
  354. std::move(Step), std::move(Body));
  355. }
  356. /// varexpr ::= 'var' identifier ('=' expression)?
  357. // (',' identifier ('=' expression)?)* 'in' expression
  358. static std::unique_ptr<ExprAST> ParseVarExpr() {
  359. getNextToken(); // eat the var.
  360. std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
  361. // At least one variable name is required.
  362. if (CurTok != tok_identifier)
  363. return Error("expected identifier after var");
  364. while (1) {
  365. std::string Name = IdentifierStr;
  366. getNextToken(); // eat identifier.
  367. // Read the optional initializer.
  368. std::unique_ptr<ExprAST> Init = nullptr;
  369. if (CurTok == '=') {
  370. getNextToken(); // eat the '='.
  371. Init = ParseExpression();
  372. if (!Init)
  373. return nullptr;
  374. }
  375. VarNames.push_back(std::make_pair(Name, std::move(Init)));
  376. // End of var list, exit loop.
  377. if (CurTok != ',')
  378. break;
  379. getNextToken(); // eat the ','.
  380. if (CurTok != tok_identifier)
  381. return Error("expected identifier list after var");
  382. }
  383. // At this point, we have to have 'in'.
  384. if (CurTok != tok_in)
  385. return Error("expected 'in' keyword after 'var'");
  386. getNextToken(); // eat 'in'.
  387. auto Body = ParseExpression();
  388. if (!Body)
  389. return nullptr;
  390. return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
  391. }
  392. /// primary
  393. /// ::= identifierexpr
  394. /// ::= numberexpr
  395. /// ::= parenexpr
  396. /// ::= ifexpr
  397. /// ::= forexpr
  398. /// ::= varexpr
  399. static std::unique_ptr<ExprAST> ParsePrimary() {
  400. switch (CurTok) {
  401. default:
  402. return Error("unknown token when expecting an expression");
  403. case tok_identifier:
  404. return ParseIdentifierExpr();
  405. case tok_number:
  406. return ParseNumberExpr();
  407. case '(':
  408. return ParseParenExpr();
  409. case tok_if:
  410. return ParseIfExpr();
  411. case tok_for:
  412. return ParseForExpr();
  413. case tok_var:
  414. return ParseVarExpr();
  415. }
  416. }
  417. /// unary
  418. /// ::= primary
  419. /// ::= '!' unary
  420. static std::unique_ptr<ExprAST> ParseUnary() {
  421. // If the current token is not an operator, it must be a primary expr.
  422. if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
  423. return ParsePrimary();
  424. // If this is a unary operator, read it.
  425. int Opc = CurTok;
  426. getNextToken();
  427. if (auto Operand = ParseUnary())
  428. return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
  429. return nullptr;
  430. }
  431. /// binoprhs
  432. /// ::= ('+' unary)*
  433. static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
  434. std::unique_ptr<ExprAST> LHS) {
  435. // If this is a binop, find its precedence.
  436. while (1) {
  437. int TokPrec = GetTokPrecedence();
  438. // If this is a binop that binds at least as tightly as the current binop,
  439. // consume it, otherwise we are done.
  440. if (TokPrec < ExprPrec)
  441. return LHS;
  442. // Okay, we know this is a binop.
  443. int BinOp = CurTok;
  444. getNextToken(); // eat binop
  445. // Parse the unary expression after the binary operator.
  446. auto RHS = ParseUnary();
  447. if (!RHS)
  448. return nullptr;
  449. // If BinOp binds less tightly with RHS than the operator after RHS, let
  450. // the pending operator take RHS as its LHS.
  451. int NextPrec = GetTokPrecedence();
  452. if (TokPrec < NextPrec) {
  453. RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
  454. if (!RHS)
  455. return nullptr;
  456. }
  457. // Merge LHS/RHS.
  458. LHS =
  459. llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
  460. }
  461. }
  462. /// expression
  463. /// ::= unary binoprhs
  464. ///
  465. static std::unique_ptr<ExprAST> ParseExpression() {
  466. auto LHS = ParseUnary();
  467. if (!LHS)
  468. return nullptr;
  469. return ParseBinOpRHS(0, std::move(LHS));
  470. }
  471. /// prototype
  472. /// ::= id '(' id* ')'
  473. /// ::= binary LETTER number? (id, id)
  474. /// ::= unary LETTER (id)
  475. static std::unique_ptr<PrototypeAST> ParsePrototype() {
  476. std::string FnName;
  477. unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
  478. unsigned BinaryPrecedence = 30;
  479. switch (CurTok) {
  480. default:
  481. return ErrorP("Expected function name in prototype");
  482. case tok_identifier:
  483. FnName = IdentifierStr;
  484. Kind = 0;
  485. getNextToken();
  486. break;
  487. case tok_unary:
  488. getNextToken();
  489. if (!isascii(CurTok))
  490. return ErrorP("Expected unary operator");
  491. FnName = "unary";
  492. FnName += (char)CurTok;
  493. Kind = 1;
  494. getNextToken();
  495. break;
  496. case tok_binary:
  497. getNextToken();
  498. if (!isascii(CurTok))
  499. return ErrorP("Expected binary operator");
  500. FnName = "binary";
  501. FnName += (char)CurTok;
  502. Kind = 2;
  503. getNextToken();
  504. // Read the precedence if present.
  505. if (CurTok == tok_number) {
  506. if (NumVal < 1 || NumVal > 100)
  507. return ErrorP("Invalid precedecnce: must be 1..100");
  508. BinaryPrecedence = (unsigned)NumVal;
  509. getNextToken();
  510. }
  511. break;
  512. }
  513. if (CurTok != '(')
  514. return ErrorP("Expected '(' in prototype");
  515. std::vector<std::string> ArgNames;
  516. while (getNextToken() == tok_identifier)
  517. ArgNames.push_back(IdentifierStr);
  518. if (CurTok != ')')
  519. return ErrorP("Expected ')' in prototype");
  520. // success.
  521. getNextToken(); // eat ')'.
  522. // Verify right number of names for operator.
  523. if (Kind && ArgNames.size() != Kind)
  524. return ErrorP("Invalid number of operands for operator");
  525. return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
  526. BinaryPrecedence);
  527. }
  528. /// definition ::= 'def' prototype expression
  529. static std::unique_ptr<FunctionAST> ParseDefinition() {
  530. getNextToken(); // eat def.
  531. auto Proto = ParsePrototype();
  532. if (!Proto)
  533. return nullptr;
  534. if (auto E = ParseExpression())
  535. return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  536. return nullptr;
  537. }
  538. /// toplevelexpr ::= expression
  539. static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
  540. if (auto E = ParseExpression()) {
  541. // Make an anonymous proto.
  542. auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
  543. std::vector<std::string>());
  544. return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
  545. }
  546. return nullptr;
  547. }
  548. /// external ::= 'extern' prototype
  549. static std::unique_ptr<PrototypeAST> ParseExtern() {
  550. getNextToken(); // eat extern.
  551. return ParsePrototype();
  552. }
  553. //===----------------------------------------------------------------------===//
  554. // Code Generation
  555. //===----------------------------------------------------------------------===//
  556. static std::unique_ptr<Module> TheModule;
  557. static IRBuilder<> Builder(getGlobalContext());
  558. static std::map<std::string, AllocaInst *> NamedValues;
  559. static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
  560. static std::unique_ptr<KaleidoscopeJIT> TheJIT;
  561. static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
  562. Value *ErrorV(const char *Str) {
  563. Error(Str);
  564. return nullptr;
  565. }
  566. Function *getFunction(std::string Name) {
  567. // First, see if the function has already been added to the current module.
  568. if (auto *F = TheModule->getFunction(Name))
  569. return F;
  570. // If not, check whether we can codegen the declaration from some existing
  571. // prototype.
  572. auto FI = FunctionProtos.find(Name);
  573. if (FI != FunctionProtos.end())
  574. return FI->second->codegen();
  575. // If no existing prototype exists, return null.
  576. return nullptr;
  577. }
  578. /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
  579. /// the function. This is used for mutable variables etc.
  580. static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
  581. const std::string &VarName) {
  582. IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
  583. TheFunction->getEntryBlock().begin());
  584. return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr,
  585. VarName.c_str());
  586. }
  587. Value *NumberExprAST::codegen() {
  588. return ConstantFP::get(getGlobalContext(), APFloat(Val));
  589. }
  590. Value *VariableExprAST::codegen() {
  591. // Look this variable up in the function.
  592. Value *V = NamedValues[Name];
  593. if (!V)
  594. return ErrorV("Unknown variable name");
  595. // Load the value.
  596. return Builder.CreateLoad(V, Name.c_str());
  597. }
  598. Value *UnaryExprAST::codegen() {
  599. Value *OperandV = Operand->codegen();
  600. if (!OperandV)
  601. return nullptr;
  602. Function *F = getFunction(std::string("unary") + Opcode);
  603. if (!F)
  604. return ErrorV("Unknown unary operator");
  605. return Builder.CreateCall(F, OperandV, "unop");
  606. }
  607. Value *BinaryExprAST::codegen() {
  608. // Special case '=' because we don't want to emit the LHS as an expression.
  609. if (Op == '=') {
  610. // Assignment requires the LHS to be an identifier.
  611. // This assume we're building without RTTI because LLVM builds that way by
  612. // default. If you build LLVM with RTTI this can be changed to a
  613. // dynamic_cast for automatic error checking.
  614. VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
  615. if (!LHSE)
  616. return ErrorV("destination of '=' must be a variable");
  617. // Codegen the RHS.
  618. Value *Val = RHS->codegen();
  619. if (!Val)
  620. return nullptr;
  621. // Look up the name.
  622. Value *Variable = NamedValues[LHSE->getName()];
  623. if (!Variable)
  624. return ErrorV("Unknown variable name");
  625. Builder.CreateStore(Val, Variable);
  626. return Val;
  627. }
  628. Value *L = LHS->codegen();
  629. Value *R = RHS->codegen();
  630. if (!L || !R)
  631. return nullptr;
  632. switch (Op) {
  633. case '+':
  634. return Builder.CreateFAdd(L, R, "addtmp");
  635. case '-':
  636. return Builder.CreateFSub(L, R, "subtmp");
  637. case '*':
  638. return Builder.CreateFMul(L, R, "multmp");
  639. case '<':
  640. L = Builder.CreateFCmpULT(L, R, "cmptmp");
  641. // Convert bool 0/1 to double 0.0 or 1.0
  642. return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
  643. "booltmp");
  644. default:
  645. break;
  646. }
  647. // If it wasn't a builtin binary operator, it must be a user defined one. Emit
  648. // a call to it.
  649. Function *F = getFunction(std::string("binary") + Op);
  650. assert(F && "binary operator not found!");
  651. Value *Ops[] = {L, R};
  652. return Builder.CreateCall(F, Ops, "binop");
  653. }
  654. Value *CallExprAST::codegen() {
  655. // Look up the name in the global module table.
  656. Function *CalleeF = getFunction(Callee);
  657. if (!CalleeF)
  658. return ErrorV("Unknown function referenced");
  659. // If argument mismatch error.
  660. if (CalleeF->arg_size() != Args.size())
  661. return ErrorV("Incorrect # arguments passed");
  662. std::vector<Value *> ArgsV;
  663. for (unsigned i = 0, e = Args.size(); i != e; ++i) {
  664. ArgsV.push_back(Args[i]->codegen());
  665. if (!ArgsV.back())
  666. return nullptr;
  667. }
  668. return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
  669. }
  670. Value *IfExprAST::codegen() {
  671. Value *CondV = Cond->codegen();
  672. if (!CondV)
  673. return nullptr;
  674. // Convert condition to a bool by comparing equal to 0.0.
  675. CondV = Builder.CreateFCmpONE(
  676. CondV, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond");
  677. Function *TheFunction = Builder.GetInsertBlock()->getParent();
  678. // Create blocks for the then and else cases. Insert the 'then' block at the
  679. // end of the function.
  680. BasicBlock *ThenBB =
  681. BasicBlock::Create(getGlobalContext(), "then", TheFunction);
  682. BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
  683. BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
  684. Builder.CreateCondBr(CondV, ThenBB, ElseBB);
  685. // Emit then value.
  686. Builder.SetInsertPoint(ThenBB);
  687. Value *ThenV = Then->codegen();
  688. if (!ThenV)
  689. return nullptr;
  690. Builder.CreateBr(MergeBB);
  691. // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
  692. ThenBB = Builder.GetInsertBlock();
  693. // Emit else block.
  694. TheFunction->getBasicBlockList().push_back(ElseBB);
  695. Builder.SetInsertPoint(ElseBB);
  696. Value *ElseV = Else->codegen();
  697. if (!ElseV)
  698. return nullptr;
  699. Builder.CreateBr(MergeBB);
  700. // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
  701. ElseBB = Builder.GetInsertBlock();
  702. // Emit merge block.
  703. TheFunction->getBasicBlockList().push_back(MergeBB);
  704. Builder.SetInsertPoint(MergeBB);
  705. PHINode *PN =
  706. Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp");
  707. PN->addIncoming(ThenV, ThenBB);
  708. PN->addIncoming(ElseV, ElseBB);
  709. return PN;
  710. }
  711. // Output for-loop as:
  712. // var = alloca double
  713. // ...
  714. // start = startexpr
  715. // store start -> var
  716. // goto loop
  717. // loop:
  718. // ...
  719. // bodyexpr
  720. // ...
  721. // loopend:
  722. // step = stepexpr
  723. // endcond = endexpr
  724. //
  725. // curvar = load var
  726. // nextvar = curvar + step
  727. // store nextvar -> var
  728. // br endcond, loop, endloop
  729. // outloop:
  730. Value *ForExprAST::codegen() {
  731. Function *TheFunction = Builder.GetInsertBlock()->getParent();
  732. // Create an alloca for the variable in the entry block.
  733. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
  734. // Emit the start code first, without 'variable' in scope.
  735. Value *StartVal = Start->codegen();
  736. if (!StartVal)
  737. return nullptr;
  738. // Store the value into the alloca.
  739. Builder.CreateStore(StartVal, Alloca);
  740. // Make the new basic block for the loop header, inserting after current
  741. // block.
  742. BasicBlock *LoopBB =
  743. BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
  744. // Insert an explicit fall through from the current block to the LoopBB.
  745. Builder.CreateBr(LoopBB);
  746. // Start insertion in LoopBB.
  747. Builder.SetInsertPoint(LoopBB);
  748. // Within the loop, the variable is defined equal to the PHI node. If it
  749. // shadows an existing variable, we have to restore it, so save it now.
  750. AllocaInst *OldVal = NamedValues[VarName];
  751. NamedValues[VarName] = Alloca;
  752. // Emit the body of the loop. This, like any other expr, can change the
  753. // current BB. Note that we ignore the value computed by the body, but don't
  754. // allow an error.
  755. if (!Body->codegen())
  756. return nullptr;
  757. // Emit the step value.
  758. Value *StepVal = nullptr;
  759. if (Step) {
  760. StepVal = Step->codegen();
  761. if (!StepVal)
  762. return nullptr;
  763. } else {
  764. // If not specified, use 1.0.
  765. StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
  766. }
  767. // Compute the end condition.
  768. Value *EndCond = End->codegen();
  769. if (!EndCond)
  770. return nullptr;
  771. // Reload, increment, and restore the alloca. This handles the case where
  772. // the body of the loop mutates the variable.
  773. Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
  774. Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
  775. Builder.CreateStore(NextVar, Alloca);
  776. // Convert condition to a bool by comparing equal to 0.0.
  777. EndCond = Builder.CreateFCmpONE(
  778. EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond");
  779. // Create the "after loop" block and insert it.
  780. BasicBlock *AfterBB =
  781. BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
  782. // Insert the conditional branch into the end of LoopEndBB.
  783. Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
  784. // Any new code will be inserted in AfterBB.
  785. Builder.SetInsertPoint(AfterBB);
  786. // Restore the unshadowed variable.
  787. if (OldVal)
  788. NamedValues[VarName] = OldVal;
  789. else
  790. NamedValues.erase(VarName);
  791. // for expr always returns 0.0.
  792. return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
  793. }
  794. Value *VarExprAST::codegen() {
  795. std::vector<AllocaInst *> OldBindings;
  796. Function *TheFunction = Builder.GetInsertBlock()->getParent();
  797. // Register all variables and emit their initializer.
  798. for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
  799. const std::string &VarName = VarNames[i].first;
  800. ExprAST *Init = VarNames[i].second.get();
  801. // Emit the initializer before adding the variable to scope, this prevents
  802. // the initializer from referencing the variable itself, and permits stuff
  803. // like this:
  804. // var a = 1 in
  805. // var a = a in ... # refers to outer 'a'.
  806. Value *InitVal;
  807. if (Init) {
  808. InitVal = Init->codegen();
  809. if (!InitVal)
  810. return nullptr;
  811. } else { // If not specified, use 0.0.
  812. InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
  813. }
  814. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
  815. Builder.CreateStore(InitVal, Alloca);
  816. // Remember the old variable binding so that we can restore the binding when
  817. // we unrecurse.
  818. OldBindings.push_back(NamedValues[VarName]);
  819. // Remember this binding.
  820. NamedValues[VarName] = Alloca;
  821. }
  822. // Codegen the body, now that all vars are in scope.
  823. Value *BodyVal = Body->codegen();
  824. if (!BodyVal)
  825. return nullptr;
  826. // Pop all our variables from scope.
  827. for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
  828. NamedValues[VarNames[i].first] = OldBindings[i];
  829. // Return the body computation.
  830. return BodyVal;
  831. }
  832. Function *PrototypeAST::codegen() {
  833. // Make the function type: double(double,double) etc.
  834. std::vector<Type *> Doubles(Args.size(),
  835. Type::getDoubleTy(getGlobalContext()));
  836. FunctionType *FT =
  837. FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
  838. Function *F =
  839. Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
  840. // Set names for all arguments.
  841. unsigned Idx = 0;
  842. for (auto &Arg : F->args())
  843. Arg.setName(Args[Idx++]);
  844. return F;
  845. }
  846. Function *FunctionAST::codegen() {
  847. // Transfer ownership of the prototype to the FunctionProtos map, but keep a
  848. // reference to it for use below.
  849. auto &P = *Proto;
  850. FunctionProtos[Proto->getName()] = std::move(Proto);
  851. Function *TheFunction = getFunction(P.getName());
  852. if (!TheFunction)
  853. return nullptr;
  854. // If this is an operator, install it.
  855. if (P.isBinaryOp())
  856. BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
  857. // Create a new basic block to start insertion into.
  858. BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
  859. Builder.SetInsertPoint(BB);
  860. // Record the function arguments in the NamedValues map.
  861. NamedValues.clear();
  862. for (auto &Arg : TheFunction->args()) {
  863. // Create an alloca for this variable.
  864. AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
  865. // Store the initial value into the alloca.
  866. Builder.CreateStore(&Arg, Alloca);
  867. // Add arguments to variable symbol table.
  868. NamedValues[Arg.getName()] = Alloca;
  869. }
  870. if (Value *RetVal = Body->codegen()) {
  871. // Finish off the function.
  872. Builder.CreateRet(RetVal);
  873. // Validate the generated code, checking for consistency.
  874. verifyFunction(*TheFunction);
  875. // Run the optimizer on the function.
  876. TheFPM->run(*TheFunction);
  877. return TheFunction;
  878. }
  879. // Error reading body, remove function.
  880. TheFunction->eraseFromParent();
  881. if (P.isBinaryOp())
  882. BinopPrecedence.erase(Proto->getOperatorName());
  883. return nullptr;
  884. }
  885. //===----------------------------------------------------------------------===//
  886. // Top-Level parsing and JIT Driver
  887. //===----------------------------------------------------------------------===//
  888. static void InitializeModuleAndPassManager() {
  889. // Open a new module.
  890. TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
  891. TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
  892. // Create a new pass manager attached to it.
  893. TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
  894. // Do simple "peephole" optimizations and bit-twiddling optzns.
  895. TheFPM->add(createInstructionCombiningPass());
  896. // Reassociate expressions.
  897. TheFPM->add(createReassociatePass());
  898. // Eliminate Common SubExpressions.
  899. TheFPM->add(createGVNPass());
  900. // Simplify the control flow graph (deleting unreachable blocks, etc).
  901. TheFPM->add(createCFGSimplificationPass());
  902. TheFPM->doInitialization();
  903. }
  904. static void HandleDefinition() {
  905. if (auto FnAST = ParseDefinition()) {
  906. if (auto *FnIR = FnAST->codegen()) {
  907. fprintf(stderr, "Read function definition:");
  908. FnIR->dump();
  909. TheJIT->addModule(std::move(TheModule));
  910. InitializeModuleAndPassManager();
  911. }
  912. } else {
  913. // Skip token for error recovery.
  914. getNextToken();
  915. }
  916. }
  917. static void HandleExtern() {
  918. if (auto ProtoAST = ParseExtern()) {
  919. if (auto *FnIR = ProtoAST->codegen()) {
  920. fprintf(stderr, "Read extern: ");
  921. FnIR->dump();
  922. FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
  923. }
  924. } else {
  925. // Skip token for error recovery.
  926. getNextToken();
  927. }
  928. }
  929. static void HandleTopLevelExpression() {
  930. // Evaluate a top-level expression into an anonymous function.
  931. if (auto FnAST = ParseTopLevelExpr()) {
  932. if (FnAST->codegen()) {
  933. // JIT the module containing the anonymous expression, keeping a handle so
  934. // we can free it later.
  935. auto H = TheJIT->addModule(std::move(TheModule));
  936. InitializeModuleAndPassManager();
  937. // Search the JIT for the __anon_expr symbol.
  938. auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
  939. assert(ExprSymbol && "Function not found");
  940. // Get the symbol's address and cast it to the right type (takes no
  941. // arguments, returns a double) so we can call it as a native function.
  942. double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
  943. fprintf(stderr, "Evaluated to %f\n", FP());
  944. // Delete the anonymous expression module from the JIT.
  945. TheJIT->removeModule(H);
  946. }
  947. } else {
  948. // Skip token for error recovery.
  949. getNextToken();
  950. }
  951. }
  952. /// top ::= definition | external | expression | ';'
  953. static void MainLoop() {
  954. while (1) {
  955. fprintf(stderr, "ready> ");
  956. switch (CurTok) {
  957. case tok_eof:
  958. return;
  959. case ';': // ignore top-level semicolons.
  960. getNextToken();
  961. break;
  962. case tok_def:
  963. HandleDefinition();
  964. break;
  965. case tok_extern:
  966. HandleExtern();
  967. break;
  968. default:
  969. HandleTopLevelExpression();
  970. break;
  971. }
  972. }
  973. }
  974. //===----------------------------------------------------------------------===//
  975. // "Library" functions that can be "extern'd" from user code.
  976. //===----------------------------------------------------------------------===//
  977. /// putchard - putchar that takes a double and returns 0.
  978. extern "C" double putchard(double X) {
  979. fputc((char)X, stderr);
  980. return 0;
  981. }
  982. /// printd - printf that takes a double prints it as "%f\n", returning 0.
  983. extern "C" double printd(double X) {
  984. fprintf(stderr, "%f\n", X);
  985. return 0;
  986. }
  987. //===----------------------------------------------------------------------===//
  988. // Main driver code.
  989. //===----------------------------------------------------------------------===//
  990. int main() {
  991. InitializeNativeTarget();
  992. InitializeNativeTargetAsmPrinter();
  993. InitializeNativeTargetAsmParser();
  994. // Install standard binary operators.
  995. // 1 is lowest precedence.
  996. BinopPrecedence['='] = 2;
  997. BinopPrecedence['<'] = 10;
  998. BinopPrecedence['+'] = 20;
  999. BinopPrecedence['-'] = 20;
  1000. BinopPrecedence['*'] = 40; // highest.
  1001. // Prime the first token.
  1002. fprintf(stderr, "ready> ");
  1003. getNextToken();
  1004. TheJIT = llvm::make_unique<KaleidoscopeJIT>();
  1005. InitializeModuleAndPassManager();
  1006. // Run the main "interpreter loop" now.
  1007. MainLoop();
  1008. return 0;
  1009. }