Acse.y 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702
  1. %{
  2. /*
  3. * Andrea Di Biagio
  4. * Politecnico di Milano, 2007
  5. *
  6. * Acse.y
  7. * Formal Languages & Compilers Machine, 2007/2008
  8. *
  9. */
  10. /*************************************************************************
  11. Compiler for the language LANCE
  12. ***************************************************************************/
  13. #include <stdio.h>
  14. #include <stdlib.h>
  15. #include <assert.h>
  16. #include "axe_struct.h"
  17. #include "axe_engine.h"
  18. #include "symbol_table.h"
  19. #include "axe_errors.h"
  20. #include "collections.h"
  21. #include "axe_expressions.h"
  22. #include "axe_gencode.h"
  23. #include "axe_utils.h"
  24. #include "axe_array.h"
  25. #include "axe_cflow_graph.h"
  26. #include "cflow_constants.h"
  27. #include "axe_transform.h"
  28. #include "axe_reg_alloc.h"
  29. #include "reg_alloc_constants.h"
  30. #include "axe_io_manager.h"
  31. #ifndef NDEBUG
  32. # include "axe_debug.h"
  33. #endif
  34. /* global variables */
  35. int line_num; /* this variable will keep track of the
  36. * source code line number. Every time that a newline
  37. * is encountered while parsing the input file, this
  38. * value is increased by 1. This value is then used
  39. * for error tracking: if the parser returns an error
  40. * or a warning, this value is used in order to notify
  41. * in which line of code the error has been found */
  42. int num_error; /* the number of errors found in the code. This value
  43. * is increased by 1 every time a new error is found
  44. * in the code. */
  45. int num_warning; /* As for the `num_error' global variable, this one
  46. * keeps track of all the warning messages displayed */
  47. /* errorcode is defined inside "axe_engine.c" */
  48. extern int errorcode; /* this variable is used to test if an error is found
  49. * while parsing the input file. It also is set
  50. * to notify if the compiler internal state is invalid.
  51. * When the parsing process is started, the value
  52. * of `errorcode' is set to the value of the macro
  53. * `AXE_OK' defined in "axe_constants.h".
  54. * As long as everything (the parsed source code and
  55. * the internal state of the compiler) is correct,
  56. * the value of `errorcode' is set to `AXE_OK'.
  57. * When an error occurs (because the input file contains
  58. * one or more syntax errors or because something went
  59. * wrong in the machine internal state), the errorcode
  60. * is set to a value that is different from `AXE_OK'. */
  61. extern int cflow_errorcode; /* As for `errorcode' this value is used to
  62. * test if an error occurs during the creation process of
  63. * a control flow graph. More informations can be found
  64. * analyzing the file `axe_cflow_graph.h'. */
  65. /* program informations */
  66. t_program_infos *program; /* The singleton instance of `program'.
  67. * An instance of `t_program_infos' holds in its
  68. * internal structure, all the useful informations
  69. * about a program. For example: the assembly
  70. * (code and directives); the symbol table;
  71. * the label manager (see axe_labels.h) etc. */
  72. t_cflow_Graph *graph; /* An instance of a control flow graph. This instance
  73. * will be generated starting from `program' and will
  74. * be used during the register allocation process */
  75. t_reg_allocator *RA; /* Register allocator. It implements the "Linear scan"
  76. * algorythm */
  77. t_io_infos *file_infos; /* input and output files used by the compiler */
  78. %}
  79. %expect 2
  80. /*=========================================================================
  81. SEMANTIC RECORDS
  82. =========================================================================*/
  83. %union {
  84. int intval;
  85. char *svalue;
  86. t_axe_expression expr;
  87. t_axe_declaration *decl;
  88. t_list *list;
  89. t_axe_label *label;
  90. t_while_statement while_stmt;
  91. }
  92. /*=========================================================================
  93. TOKENS
  94. =========================================================================*/
  95. %start program
  96. %token LBRACE RBRACE LPAR RPAR LSQUARE RSQUARE
  97. %token SEMI COLON PLUS MINUS MUL_OP DIV_OP MOD_OP
  98. %token AND_OP OR_OP NOT_OP
  99. %token ASSIGN LT GT SHL_OP SHR_OP EQ NOTEQ LTEQ GTEQ
  100. %token ANDAND OROR
  101. %token COMMA
  102. %token FOR
  103. %token RETURN
  104. %token READ
  105. %token WRITE
  106. %token <label> DO
  107. %token <while_stmt> WHILE
  108. %token <label> IF
  109. %token <label> IIF
  110. %token <label> ELSE
  111. %token <intval> TYPE
  112. %token <svalue> IDENTIFIER
  113. %token <intval> NUMBER
  114. %type <expr> exp
  115. %type <decl> declaration
  116. %type <list> declaration_list
  117. %type <label> if_stmt
  118. %type <label> init_if_stmt
  119. /*=========================================================================
  120. OPERATOR PRECEDENCES
  121. =========================================================================*/
  122. %left COMMA
  123. %left ASSIGN
  124. %left OROR
  125. %left ANDAND
  126. %left OR_OP
  127. %left AND_OP
  128. %left EQ NOTEQ
  129. %left LT GT LTEQ GTEQ
  130. %left SHL_OP SHR_OP
  131. %left MINUS PLUS
  132. %left MUL_OP DIV_OP
  133. %right NOT
  134. /*=========================================================================
  135. BISON GRAMMAR
  136. =========================================================================*/
  137. %%
  138. /* `program' is the starting non-terminal of the grammar.
  139. * A program is composed by:
  140. 1. declarations (zero or more);
  141. 2. A list of instructions. (at least one instruction!).
  142. * When the rule associated with the non-terminal `program' is executed,
  143. * the parser notify it to the `program' singleton instance. */
  144. program : var_declarations statements
  145. {
  146. /* Notify the end of the program. Once called
  147. * the function `set_end_Program' - if necessary -
  148. * introduces a `HALT' instruction into the
  149. * list of instructions. */
  150. set_end_Program(program);
  151. /* return from yyparse() */
  152. YYACCEPT;
  153. }
  154. ;
  155. var_declarations : var_declarations var_declaration { /* does nothing */ }
  156. | /* empty */ { /* does nothing */ }
  157. ;
  158. var_declaration : TYPE declaration_list SEMI
  159. {
  160. /* update the program infos by adding new variables */
  161. set_new_variables(program, $1, $2);
  162. }
  163. ;
  164. declaration_list : declaration_list COMMA declaration
  165. { /* add the new declaration to the list of declarations */
  166. $$ = addElement($1, $3, -1);
  167. }
  168. | declaration
  169. {
  170. /* add the new declaration to the list of declarations */
  171. $$ = addElement(NULL, $1, -1);
  172. }
  173. ;
  174. declaration : IDENTIFIER ASSIGN NUMBER
  175. {
  176. /* create a new instance of t_axe_declaration */
  177. $$ = alloc_declaration($1, 0, 0, $3);
  178. /* test if an `out of memory' occurred */
  179. if ($$ == NULL)
  180. notifyError(AXE_OUT_OF_MEMORY);
  181. }
  182. | IDENTIFIER LSQUARE NUMBER RSQUARE
  183. {
  184. /* create a new instance of t_axe_declaration */
  185. $$ = alloc_declaration($1, 1, $3, 0);
  186. /* test if an `out of memory' occurred */
  187. if ($$ == NULL)
  188. notifyError(AXE_OUT_OF_MEMORY);
  189. }
  190. | IDENTIFIER
  191. {
  192. /* create a new instance of t_axe_declaration */
  193. $$ = alloc_declaration($1, 0, 0, 0);
  194. /* test if an `out of memory' occurred */
  195. if ($$ == NULL)
  196. notifyError(AXE_OUT_OF_MEMORY);
  197. }
  198. ;
  199. /* A block of code can be either a single statement or
  200. * a set of statements enclosed between braces */
  201. code_block : statement { /* does nothing */ }
  202. | LBRACE statements RBRACE { /* does nothing */ }
  203. ;
  204. /* One or more code statements */
  205. statements : statements statement { /* does nothing */ }
  206. | statement { /* does nothing */ }
  207. ;
  208. /* A statement can be either an assignment statement or a control statement
  209. * or a read/write statement or a semicolon */
  210. statement : assign_statement SEMI { /* does nothing */ }
  211. | control_statement { /* does nothing */ }
  212. | read_write_statement SEMI { /* does nothing */ }
  213. | SEMI { gen_nop_instruction(program); }
  214. ;
  215. control_statement : if_statement { /* does nothing */ }
  216. | init_if_statement { /* does nothing */ }
  217. | while_statement { /* does nothing */ }
  218. | do_while_statement SEMI { /* does nothing */ }
  219. | return_statement SEMI { /* does nothing */ }
  220. ;
  221. read_write_statement : read_statement { /* does nothing */ }
  222. | write_statement { /* does nothing */ }
  223. ;
  224. assign_statement : IDENTIFIER LSQUARE exp RSQUARE ASSIGN exp
  225. {
  226. /* Notify to `program' that the value $6
  227. * have to be assigned to the location
  228. * addressed by $1[$3]. Where $1 is obviously
  229. * the array/pointer identifier, $3 is an expression
  230. * that holds an integer value. That value will be
  231. * used as an index for the array $1 */
  232. storeArrayElement(program, $1, $3, $6);
  233. /* free the memory associated with the IDENTIFIER.
  234. * The use of the free instruction is required
  235. * because of the value associated with IDENTIFIER.
  236. * The value of IDENTIFIER is a string created
  237. * by a call to the function `strdup' (see Acse.lex) */
  238. free($1);
  239. }
  240. | IDENTIFIER ASSIGN exp
  241. {
  242. int location;
  243. t_axe_instruction *instr;
  244. /* in order to assign a value to a variable, we have to
  245. * know where the variable is located (i.e. in which register).
  246. * the function `get_symbol_location' is used in order
  247. * to retrieve the register location assigned to
  248. * a given identifier.
  249. * A symbol table keeps track of the location of every
  250. * declared variable.
  251. * `get_symbol_location' perform a query on the symbol table
  252. * in order to discover the correct location of
  253. * the variable with $1 as identifier */
  254. /* get the location of the symbol with the given ID. */
  255. location = get_symbol_location(program, $1, 0);
  256. /* update the value of location */
  257. if ($3.expression_type == IMMEDIATE)
  258. gen_move_immediate(program, location, $3.value);
  259. else
  260. instr = gen_add_instruction
  261. (program, location, REG_0, $3.value, CG_DIRECT_ALL);
  262. /* free the memory associated with the IDENTIFIER */
  263. free($1);
  264. }
  265. ;
  266. init_if_statement : init_if_stmt
  267. {
  268. assignLabel(program, $1);
  269. }
  270. | init_if_stmt ELSE
  271. {
  272. $2 = newLabel(program);
  273. gen_bt_instruction(program, $2, 0);
  274. assignLabel(program, $1);
  275. }
  276. code_block
  277. {
  278. assignLabel(program, $2);
  279. }
  280. ;
  281. if_statement : if_stmt
  282. {
  283. /* fix the `label_else' */
  284. assignLabel(program, $1);
  285. }
  286. | if_stmt ELSE
  287. {
  288. /* reserve a new label that points to the address where to jump if
  289. * `exp' is verified */
  290. $2 = newLabel(program);
  291. /* exit from the if-else */
  292. gen_bt_instruction (program, $2, 0);
  293. /* fix the `label_else' */
  294. assignLabel(program, $1);
  295. }
  296. code_block
  297. {
  298. /* fix the `label_else' */
  299. assignLabel(program, $2);
  300. }
  301. ;
  302. if_stmt : IF
  303. {
  304. /* the label that points to the address where to jump if
  305. * `exp' is not verified */
  306. $1 = newLabel(program);
  307. }
  308. LPAR exp RPAR
  309. {
  310. if ($4.expression_type == IMMEDIATE)
  311. gen_load_immediate(program, $4.value);
  312. else
  313. gen_andb_instruction(program, $4.value,
  314. $4.value, $4.value, CG_DIRECT_ALL);
  315. /* if `exp' returns FALSE, jump to the label $1 */
  316. gen_beq_instruction (program, $1, 0);
  317. }
  318. code_block { $$ = $1; }
  319. ;
  320. init_if_stmt : IIF
  321. {
  322. $1 = newLabel(program);
  323. }
  324. LPAR assign_statement SEMI exp RPAR
  325. {
  326. if ($6.expression_type == IMMEDIATE)
  327. gen_load_immediate(program, $6.value);
  328. else
  329. gen_andb_instruction(program, $6.value, $6.value,
  330. $6.value, CG_DIRECT_ALL);
  331. gen_beq_instruction(program, $1, 0);
  332. }
  333. code_block
  334. { $$ = $1; }
  335. ;
  336. while_statement : WHILE
  337. {
  338. /* initialize the value of the non-terminal */
  339. $1 = create_while_statement();
  340. /* reserve and fix a new label */
  341. $1.label_condition
  342. = assignNewLabel(program);
  343. }
  344. LPAR exp RPAR
  345. {
  346. if ($4.expression_type == IMMEDIATE)
  347. gen_load_immediate(program, $4.value);
  348. else
  349. gen_andb_instruction(program, $4.value,
  350. $4.value, $4.value, CG_DIRECT_ALL);
  351. /* reserve a new label. This new label will point
  352. * to the first instruction after the while code
  353. * block */
  354. $1.label_end = newLabel(program);
  355. /* if `exp' returns FALSE, jump to the label $1.label_end */
  356. gen_beq_instruction (program, $1.label_end, 0);
  357. }
  358. code_block
  359. {
  360. /* jump to the beginning of the loop */
  361. gen_bt_instruction
  362. (program, $1.label_condition, 0);
  363. /* fix the label `label_end' */
  364. assignLabel(program, $1.label_end);
  365. }
  366. ;
  367. do_while_statement : DO
  368. {
  369. /* the label that points to the address where to jump if
  370. * `exp' is not verified */
  371. $1 = newLabel(program);
  372. /* fix the label */
  373. assignLabel(program, $1);
  374. }
  375. code_block WHILE LPAR exp RPAR
  376. {
  377. if ($6.expression_type == IMMEDIATE)
  378. gen_load_immediate(program, $6.value);
  379. else
  380. gen_andb_instruction(program, $6.value,
  381. $6.value, $6.value, CG_DIRECT_ALL);
  382. /* if `exp' returns TRUE, jump to the label $1 */
  383. gen_bne_instruction (program, $1, 0);
  384. }
  385. ;
  386. return_statement : RETURN
  387. {
  388. /* insert an HALT instruction */
  389. gen_halt_instruction(program);
  390. }
  391. ;
  392. read_statement : READ LPAR IDENTIFIER RPAR
  393. {
  394. int location;
  395. /* read from standard input an integer value and assign
  396. * it to a variable associated with the given identifier */
  397. /* get the location of the symbol with the given ID */
  398. /* lookup the symbol table and fetch the register location
  399. * associated with the IDENTIFIER $3. */
  400. location = get_symbol_location(program, $3, 0);
  401. /* insert a read instruction */
  402. gen_read_instruction (program, location);
  403. /* free the memory associated with the IDENTIFIER */
  404. free($3);
  405. }
  406. ;
  407. write_statement : WRITE LPAR exp RPAR
  408. {
  409. int location;
  410. if ($3.expression_type == IMMEDIATE)
  411. {
  412. /* load `immediate' into a new register. Returns the new register
  413. * identifier or REG_INVALID if an error occurs */
  414. location = gen_load_immediate(program, $3.value);
  415. }
  416. else
  417. location = $3.value;
  418. /* write to standard output an integer value */
  419. gen_write_instruction (program, location);
  420. }
  421. ;
  422. exp: NUMBER { $$ = create_expression ($1, IMMEDIATE); }
  423. | IDENTIFIER {
  424. int location;
  425. /* get the location of the symbol with the given ID */
  426. location = get_symbol_location(program, $1, 0);
  427. /* return the register location of IDENTIFIER as
  428. * a value for `exp' */
  429. $$ = create_expression (location, REGISTER);
  430. /* free the memory associated with the IDENTIFIER */
  431. free($1);
  432. }
  433. | IDENTIFIER LSQUARE exp RSQUARE {
  434. int reg;
  435. /* load the value IDENTIFIER[exp]
  436. * into `arrayElement' */
  437. reg = loadArrayElement(program, $1, $3);
  438. /* create a new expression */
  439. $$ = create_expression (reg, REGISTER);
  440. /* free the memory associated with the IDENTIFIER */
  441. free($1);
  442. }
  443. | NOT_OP NUMBER { if ($2 == 0)
  444. $$ = create_expression (1, IMMEDIATE);
  445. else
  446. $$ = create_expression (0, IMMEDIATE);
  447. }
  448. | NOT_OP IDENTIFIER {
  449. int identifier_location;
  450. int output_register;
  451. /* get the location of the symbol with the given ID */
  452. identifier_location =
  453. get_symbol_location(program, $2, 0);
  454. /* generate a NOT instruction. In order to do this,
  455. * at first we have to ask for a free register where
  456. * to store the result of the NOT instruction. */
  457. output_register = getNewRegister(program);
  458. /* Now we are able to generate a NOT instruction */
  459. gen_notl_instruction (program, output_register
  460. , identifier_location);
  461. $$ = create_expression (output_register, REGISTER);
  462. /* free the memory associated with the IDENTIFIER */
  463. free($2);
  464. }
  465. | exp AND_OP exp {
  466. $$ = handle_bin_numeric_op(program, $1, $3, ANDB);
  467. }
  468. | exp OR_OP exp {
  469. $$ = handle_bin_numeric_op(program, $1, $3, ORB);
  470. }
  471. | exp PLUS exp {
  472. $$ = handle_bin_numeric_op(program, $1, $3, ADD);
  473. }
  474. | exp MINUS exp {
  475. $$ = handle_bin_numeric_op(program, $1, $3, SUB);
  476. }
  477. | exp MUL_OP exp {
  478. $$ = handle_bin_numeric_op(program, $1, $3, MUL);
  479. }
  480. | exp DIV_OP exp {
  481. $$ = handle_bin_numeric_op(program, $1, $3, DIV);
  482. }
  483. | exp LT exp {
  484. $$ = handle_binary_comparison (program, $1, $3, _LT_);
  485. }
  486. | exp GT exp {
  487. $$ = handle_binary_comparison (program, $1, $3, _GT_);
  488. }
  489. | exp EQ exp {
  490. $$ = handle_binary_comparison (program, $1, $3, _EQ_);
  491. }
  492. | exp NOTEQ exp {
  493. $$ = handle_binary_comparison (program, $1, $3, _NOTEQ_);
  494. }
  495. | exp LTEQ exp {
  496. $$ = handle_binary_comparison (program, $1, $3, _LTEQ_);
  497. }
  498. | exp GTEQ exp {
  499. $$ = handle_binary_comparison (program, $1, $3, _GTEQ_);
  500. }
  501. | exp SHL_OP exp { $$ = handle_bin_numeric_op(program, $1, $3, SHL); }
  502. | exp SHR_OP exp { $$ = handle_bin_numeric_op(program, $1, $3, SHR); }
  503. | exp ANDAND exp { $$ = handle_bin_numeric_op(program, $1, $3, ANDL); }
  504. | exp OROR exp { $$ = handle_bin_numeric_op(program, $1, $3, ORL); }
  505. | LPAR exp RPAR { $$ = $2; }
  506. | MINUS exp {
  507. if ($2.expression_type == IMMEDIATE)
  508. {
  509. $$ = $2;
  510. $$.value = - ($$.value);
  511. }
  512. else
  513. {
  514. t_axe_expression exp_r0;
  515. /* create an expression for regisrer REG_0 */
  516. exp_r0.value = REG_0;
  517. exp_r0.expression_type = REGISTER;
  518. $$ = handle_bin_numeric_op
  519. (program, exp_r0, $2, SUB);
  520. }
  521. }
  522. ;
  523. %%
  524. /*=========================================================================
  525. MAIN
  526. =========================================================================*/
  527. int main (int argc, char **argv)
  528. {
  529. /* initialize all the compiler data structures and global variables */
  530. init_compiler(argc, argv);
  531. /* start the parsing procedure */
  532. yyparse();
  533. #ifndef NDEBUG
  534. fprintf(stdout, "Parsing process completed. \n");
  535. #endif
  536. /* test if the parsing process completed succesfully */
  537. checkConsistency();
  538. #ifndef NDEBUG
  539. fprintf(stdout, "Creating a control flow graph. \n");
  540. #endif
  541. /* create the control flow graph */
  542. graph = createFlowGraph(program->instructions);
  543. checkConsistency();
  544. #ifndef NDEBUG
  545. assert(program != NULL);
  546. assert(program->sy_table != NULL);
  547. assert(file_infos != NULL);
  548. assert(file_infos->syTable_output != NULL);
  549. printSymbolTable(program->sy_table, file_infos->syTable_output);
  550. printGraphInfos(graph, file_infos->cfg_1, 0);
  551. fprintf(stdout, "Updating the basic blocks. \n");
  552. #endif
  553. /* update the control flow graph by inserting load and stores inside
  554. * every basic block */
  555. graph = insertLoadAndStoreInstr(program, graph);
  556. #ifndef NDEBUG
  557. fprintf(stdout, "Executing a liveness analysis on the intermediate code \n");
  558. #endif
  559. performLivenessAnalysis(graph);
  560. checkConsistency();
  561. #ifndef NDEBUG
  562. printGraphInfos(graph, file_infos->cfg_2, 1);
  563. #endif
  564. #ifndef NDEBUG
  565. fprintf(stdout, "Starting the register allocation process. \n");
  566. #endif
  567. /* initialize the register allocator by using the control flow
  568. * informations stored into the control flow graph */
  569. RA = initializeRegAlloc(graph);
  570. /* execute the linear scan algorythm */
  571. execute_linear_scan(RA);
  572. #ifndef NDEBUG
  573. printRegAllocInfos(RA, file_infos->reg_alloc_output);
  574. #endif
  575. #ifndef NDEBUG
  576. fprintf(stdout, "Updating the control flow informations. \n");
  577. #endif
  578. /* apply changes to the program informations by using the informations
  579. * of the register allocation process */
  580. updateProgramInfos(program, graph, RA);
  581. #ifndef NDEBUG
  582. fprintf(stdout, "Writing the assembly file... \n");
  583. #endif
  584. writeAssembly(program, file_infos->output_file_name);
  585. #ifndef NDEBUG
  586. fprintf(stdout, "Assembly written on file \"%s\".\n", file_infos->output_file_name);
  587. #endif
  588. /* shutdown the compiler */
  589. shutdownCompiler(0);
  590. return 0;
  591. }
  592. /*=========================================================================
  593. YYERROR
  594. =========================================================================*/
  595. int yyerror(const char* errmsg)
  596. {
  597. errorcode = AXE_SYNTAX_ERROR;
  598. return 0;
  599. }