Acse.y 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. %{
  2. /*
  3. * Andrea Di Biagio
  4. * Politecnico di Milano, 2007
  5. *
  6. * Acse.y
  7. * Formal Languages & Compilers Machine, 2007/2008
  8. *
  9. */
  10. /*************************************************************************
  11. Compiler for the language LANCE
  12. ***************************************************************************/
  13. #include <stdio.h>
  14. #include <stdlib.h>
  15. #include <assert.h>
  16. #include "axe_struct.h"
  17. #include "axe_engine.h"
  18. #include "symbol_table.h"
  19. #include "axe_errors.h"
  20. #include "collections.h"
  21. #include "axe_expressions.h"
  22. #include "axe_gencode.h"
  23. #include "axe_utils.h"
  24. #include "axe_array.h"
  25. #include "axe_cflow_graph.h"
  26. #include "cflow_constants.h"
  27. #include "axe_transform.h"
  28. #include "axe_reg_alloc.h"
  29. #include "reg_alloc_constants.h"
  30. #include "axe_io_manager.h"
  31. #ifndef NDEBUG
  32. # include "axe_debug.h"
  33. #endif
  34. /* global variables */
  35. int line_num; /* this variable will keep track of the
  36. * source code line number. Every time that a newline
  37. * is encountered while parsing the input file, this
  38. * value is increased by 1. This value is then used
  39. * for error tracking: if the parser returns an error
  40. * or a warning, this value is used in order to notify
  41. * in which line of code the error has been found */
  42. int num_error; /* the number of errors found in the code. This value
  43. * is increased by 1 every time a new error is found
  44. * in the code. */
  45. int num_warning; /* As for the `num_error' global variable, this one
  46. * keeps track of all the warning messages displayed */
  47. /* errorcode is defined inside "axe_engine.c" */
  48. extern int errorcode; /* this variable is used to test if an error is found
  49. * while parsing the input file. It also is set
  50. * to notify if the compiler internal state is invalid.
  51. * When the parsing process is started, the value
  52. * of `errorcode' is set to the value of the macro
  53. * `AXE_OK' defined in "axe_constants.h".
  54. * As long as everything (the parsed source code and
  55. * the internal state of the compiler) is correct,
  56. * the value of `errorcode' is set to `AXE_OK'.
  57. * When an error occurs (because the input file contains
  58. * one or more syntax errors or because something went
  59. * wrong in the machine internal state), the errorcode
  60. * is set to a value that is different from `AXE_OK'. */
  61. extern int cflow_errorcode; /* As for `errorcode' this value is used to
  62. * test if an error occurs during the creation process of
  63. * a control flow graph. More informations can be found
  64. * analyzing the file `axe_cflow_graph.h'. */
  65. /* program informations */
  66. t_program_infos *program; /* The singleton instance of `program'.
  67. * An instance of `t_program_infos' holds in its
  68. * internal structure, all the useful informations
  69. * about a program. For example: the assembly
  70. * (code and directives); the symbol table;
  71. * the label manager (see axe_labels.h) etc. */
  72. t_cflow_Graph *graph; /* An instance of a control flow graph. This instance
  73. * will be generated starting from `program' and will
  74. * be used during the register allocation process */
  75. t_reg_allocator *RA; /* Register allocator. It implements the "Linear scan"
  76. * algorythm */
  77. t_io_infos *file_infos; /* input and output files used by the compiler */
  78. %}
  79. %expect 1
  80. /*=========================================================================
  81. SEMANTIC RECORDS
  82. =========================================================================*/
  83. %union {
  84. int intval;
  85. char *svalue;
  86. t_axe_expression expr;
  87. t_axe_declaration *decl;
  88. t_list *list;
  89. t_axe_label *label;
  90. t_while_statement while_stmt;
  91. }
  92. /*=========================================================================
  93. TOKENS
  94. =========================================================================*/
  95. %start program
  96. %token LBRACE RBRACE LPAR RPAR LSQUARE RSQUARE
  97. %token SEMI COLON PLUS MINUS MUL_OP DIV_OP MOD_OP
  98. %token AND_OP OR_OP NOT_OP
  99. %token ASSIGN LT GT SHL_OP SHR_OP EQ NOTEQ LTEQ GTEQ
  100. %token ANDAND OROR
  101. %token COMMA
  102. %token FOR
  103. %token RETURN
  104. %token READ
  105. %token WRITE
  106. %token <label> DO
  107. %token <while_stmt> WHILE
  108. %token <label> IF
  109. %token <label> ELSE
  110. %token <intval> TYPE
  111. %token <svalue> IDENTIFIER
  112. %token <intval> NUMBER
  113. %type <expr> exp
  114. %type <decl> declaration
  115. %type <list> declaration_list
  116. %type <label> if_stmt
  117. /*=========================================================================
  118. OPERATOR PRECEDENCES
  119. =========================================================================*/
  120. %left COMMA
  121. %left ASSIGN
  122. %left OROR
  123. %left ANDAND
  124. %left OR_OP
  125. %left AND_OP
  126. %left EQ NOTEQ
  127. %left LT GT LTEQ GTEQ
  128. %left SHL_OP SHR_OP
  129. %left MINUS PLUS
  130. %left MUL_OP DIV_OP
  131. %right NOT
  132. /*=========================================================================
  133. BISON GRAMMAR
  134. =========================================================================*/
  135. %%
  136. /* `program' is the starting non-terminal of the grammar.
  137. * A program is composed by:
  138. 1. declarations (zero or more);
  139. 2. A list of instructions. (at least one instruction!).
  140. * When the rule associated with the non-terminal `program' is executed,
  141. * the parser notify it to the `program' singleton instance. */
  142. program : var_declarations statements
  143. {
  144. /* Notify the end of the program. Once called
  145. * the function `set_end_Program' - if necessary -
  146. * introduces a `HALT' instruction into the
  147. * list of instructions. */
  148. set_end_Program(program);
  149. /* return from yyparse() */
  150. YYACCEPT;
  151. }
  152. ;
  153. var_declarations : var_declarations var_declaration { /* does nothing */ }
  154. | /* empty */ { /* does nothing */ }
  155. ;
  156. var_declaration : TYPE declaration_list SEMI
  157. {
  158. /* update the program infos by adding new variables */
  159. set_new_variables(program, $1, $2);
  160. }
  161. ;
  162. declaration_list : declaration_list COMMA declaration
  163. { /* add the new declaration to the list of declarations */
  164. $$ = addElement($1, $3, -1);
  165. }
  166. | declaration
  167. {
  168. /* add the new declaration to the list of declarations */
  169. $$ = addElement(NULL, $1, -1);
  170. }
  171. ;
  172. declaration : IDENTIFIER ASSIGN NUMBER
  173. {
  174. /* create a new instance of t_axe_declaration */
  175. $$ = alloc_declaration($1, 0, 0, $3);
  176. /* test if an `out of memory' occurred */
  177. if ($$ == NULL)
  178. notifyError(AXE_OUT_OF_MEMORY);
  179. }
  180. | IDENTIFIER LSQUARE NUMBER RSQUARE
  181. {
  182. /* create a new instance of t_axe_declaration */
  183. $$ = alloc_declaration($1, 1, $3, 0);
  184. /* test if an `out of memory' occurred */
  185. if ($$ == NULL)
  186. notifyError(AXE_OUT_OF_MEMORY);
  187. }
  188. | IDENTIFIER
  189. {
  190. /* create a new instance of t_axe_declaration */
  191. $$ = alloc_declaration($1, 0, 0, 0);
  192. /* test if an `out of memory' occurred */
  193. if ($$ == NULL)
  194. notifyError(AXE_OUT_OF_MEMORY);
  195. }
  196. ;
  197. /* A block of code can be either a single statement or
  198. * a set of statements enclosed between braces */
  199. code_block : statement { /* does nothing */ }
  200. | LBRACE statements RBRACE { /* does nothing */ }
  201. ;
  202. /* One or more code statements */
  203. statements : statements statement { /* does nothing */ }
  204. | statement { /* does nothing */ }
  205. ;
  206. /* A statement can be either an assignment statement or a control statement
  207. * or a read/write statement or a semicolon */
  208. statement : assign_statement SEMI { /* does nothing */ }
  209. | control_statement { /* does nothing */ }
  210. | read_write_statement SEMI { /* does nothing */ }
  211. | SEMI { gen_nop_instruction(program); }
  212. ;
  213. control_statement : if_statement { /* does nothing */ }
  214. | while_statement { /* does nothing */ }
  215. | do_while_statement SEMI { /* does nothing */ }
  216. | return_statement SEMI { /* does nothing */ }
  217. ;
  218. read_write_statement : read_statement { /* does nothing */ }
  219. | write_statement { /* does nothing */ }
  220. ;
  221. assign_statement : IDENTIFIER LSQUARE exp RSQUARE ASSIGN exp
  222. {
  223. /* Notify to `program' that the value $6
  224. * have to be assigned to the location
  225. * addressed by $1[$3]. Where $1 is obviously
  226. * the array/pointer identifier, $3 is an expression
  227. * that holds an integer value. That value will be
  228. * used as an index for the array $1 */
  229. storeArrayElement(program, $1, $3, $6);
  230. /* free the memory associated with the IDENTIFIER.
  231. * The use of the free instruction is required
  232. * because of the value associated with IDENTIFIER.
  233. * The value of IDENTIFIER is a string created
  234. * by a call to the function `strdup' (see Acse.lex) */
  235. free($1);
  236. }
  237. | IDENTIFIER ASSIGN exp
  238. {
  239. int location;
  240. t_axe_instruction *instr;
  241. /* in order to assign a value to a variable, we have to
  242. * know where the variable is located (i.e. in which register).
  243. * the function `get_symbol_location' is used in order
  244. * to retrieve the register location assigned to
  245. * a given identifier.
  246. * A symbol table keeps track of the location of every
  247. * declared variable.
  248. * `get_symbol_location' perform a query on the symbol table
  249. * in order to discover the correct location of
  250. * the variable with $1 as identifier */
  251. /* get the location of the symbol with the given ID. */
  252. location = get_symbol_location(program, $1, 0);
  253. /* update the value of location */
  254. if ($3.expression_type == IMMEDIATE)
  255. gen_move_immediate(program, location, $3.value);
  256. else
  257. instr = gen_add_instruction
  258. (program, location, REG_0, $3.value, CG_DIRECT_ALL);
  259. /* free the memory associated with the IDENTIFIER */
  260. free($1);
  261. }
  262. ;
  263. if_statement : if_stmt
  264. {
  265. /* fix the `label_else' */
  266. assignLabel(program, $1);
  267. }
  268. | if_stmt ELSE
  269. {
  270. /* reserve a new label that points to the address where to jump if
  271. * `exp' is verified */
  272. $2 = newLabel(program);
  273. /* exit from the if-else */
  274. gen_bt_instruction (program, $2, 0);
  275. /* fix the `label_else' */
  276. assignLabel(program, $1);
  277. }
  278. code_block
  279. {
  280. /* fix the `label_else' */
  281. assignLabel(program, $2);
  282. }
  283. ;
  284. if_stmt : IF
  285. {
  286. /* the label that points to the address where to jump if
  287. * `exp' is not verified */
  288. $1 = newLabel(program);
  289. }
  290. LPAR exp RPAR
  291. {
  292. if ($4.expression_type == IMMEDIATE)
  293. gen_load_immediate(program, $4.value);
  294. else
  295. gen_andb_instruction(program, $4.value,
  296. $4.value, $4.value, CG_DIRECT_ALL);
  297. /* if `exp' returns FALSE, jump to the label $1 */
  298. gen_beq_instruction (program, $1, 0);
  299. }
  300. code_block { $$ = $1; }
  301. ;
  302. while_statement : WHILE
  303. {
  304. /* initialize the value of the non-terminal */
  305. $1 = create_while_statement();
  306. /* reserve and fix a new label */
  307. $1.label_condition
  308. = assignNewLabel(program);
  309. }
  310. LPAR exp RPAR
  311. {
  312. if ($4.expression_type == IMMEDIATE)
  313. gen_load_immediate(program, $4.value);
  314. else
  315. gen_andb_instruction(program, $4.value,
  316. $4.value, $4.value, CG_DIRECT_ALL);
  317. /* reserve a new label. This new label will point
  318. * to the first instruction after the while code
  319. * block */
  320. $1.label_end = newLabel(program);
  321. /* if `exp' returns FALSE, jump to the label $1.label_end */
  322. gen_beq_instruction (program, $1.label_end, 0);
  323. }
  324. code_block
  325. {
  326. /* jump to the beginning of the loop */
  327. gen_bt_instruction
  328. (program, $1.label_condition, 0);
  329. /* fix the label `label_end' */
  330. assignLabel(program, $1.label_end);
  331. }
  332. ;
  333. do_while_statement : DO
  334. {
  335. /* the label that points to the address where to jump if
  336. * `exp' is not verified */
  337. $1 = newLabel(program);
  338. /* fix the label */
  339. assignLabel(program, $1);
  340. }
  341. code_block WHILE LPAR exp RPAR
  342. {
  343. if ($6.expression_type == IMMEDIATE)
  344. gen_load_immediate(program, $6.value);
  345. else
  346. gen_andb_instruction(program, $6.value,
  347. $6.value, $6.value, CG_DIRECT_ALL);
  348. /* if `exp' returns TRUE, jump to the label $1 */
  349. gen_bne_instruction (program, $1, 0);
  350. }
  351. ;
  352. return_statement : RETURN
  353. {
  354. /* insert an HALT instruction */
  355. gen_halt_instruction(program);
  356. }
  357. ;
  358. read_statement : READ LPAR IDENTIFIER RPAR
  359. {
  360. int location;
  361. /* read from standard input an integer value and assign
  362. * it to a variable associated with the given identifier */
  363. /* get the location of the symbol with the given ID */
  364. /* lookup the symbol table and fetch the register location
  365. * associated with the IDENTIFIER $3. */
  366. location = get_symbol_location(program, $3, 0);
  367. /* insert a read instruction */
  368. gen_read_instruction (program, location);
  369. /* free the memory associated with the IDENTIFIER */
  370. free($3);
  371. }
  372. ;
  373. write_statement : WRITE LPAR exp RPAR
  374. {
  375. int location;
  376. if ($3.expression_type == IMMEDIATE)
  377. {
  378. /* load `immediate' into a new register. Returns the new register
  379. * identifier or REG_INVALID if an error occurs */
  380. location = gen_load_immediate(program, $3.value);
  381. }
  382. else
  383. location = $3.value;
  384. /* write to standard output an integer value */
  385. gen_write_instruction (program, location);
  386. }
  387. ;
  388. exp: NUMBER { $$ = create_expression ($1, IMMEDIATE); }
  389. | IDENTIFIER {
  390. int location;
  391. /* get the location of the symbol with the given ID */
  392. location = get_symbol_location(program, $1, 0);
  393. /* return the register location of IDENTIFIER as
  394. * a value for `exp' */
  395. $$ = create_expression (location, REGISTER);
  396. /* free the memory associated with the IDENTIFIER */
  397. free($1);
  398. }
  399. | IDENTIFIER LSQUARE exp RSQUARE {
  400. int reg;
  401. /* load the value IDENTIFIER[exp]
  402. * into `arrayElement' */
  403. reg = loadArrayElement(program, $1, $3);
  404. /* create a new expression */
  405. $$ = create_expression (reg, REGISTER);
  406. /* free the memory associated with the IDENTIFIER */
  407. free($1);
  408. }
  409. | NOT_OP NUMBER { if ($2 == 0)
  410. $$ = create_expression (1, IMMEDIATE);
  411. else
  412. $$ = create_expression (0, IMMEDIATE);
  413. }
  414. | NOT_OP IDENTIFIER {
  415. int identifier_location;
  416. int output_register;
  417. /* get the location of the symbol with the given ID */
  418. identifier_location =
  419. get_symbol_location(program, $2, 0);
  420. /* generate a NOT instruction. In order to do this,
  421. * at first we have to ask for a free register where
  422. * to store the result of the NOT instruction. */
  423. output_register = getNewRegister(program);
  424. /* Now we are able to generate a NOT instruction */
  425. gen_notl_instruction (program, output_register
  426. , identifier_location);
  427. $$ = create_expression (output_register, REGISTER);
  428. /* free the memory associated with the IDENTIFIER */
  429. free($2);
  430. }
  431. | exp AND_OP exp {
  432. $$ = handle_bin_numeric_op(program, $1, $3, ANDB);
  433. }
  434. | exp OR_OP exp {
  435. $$ = handle_bin_numeric_op(program, $1, $3, ORB);
  436. }
  437. | exp PLUS exp {
  438. $$ = handle_bin_numeric_op(program, $1, $3, ADD);
  439. }
  440. | exp MINUS exp {
  441. $$ = handle_bin_numeric_op(program, $1, $3, SUB);
  442. }
  443. | exp MUL_OP exp {
  444. $$ = handle_bin_numeric_op(program, $1, $3, MUL);
  445. }
  446. | exp DIV_OP exp {
  447. $$ = handle_bin_numeric_op(program, $1, $3, DIV);
  448. }
  449. | exp LT exp {
  450. $$ = handle_binary_comparison (program, $1, $3, _LT_);
  451. }
  452. | exp GT exp {
  453. $$ = handle_binary_comparison (program, $1, $3, _GT_);
  454. }
  455. | exp EQ exp {
  456. $$ = handle_binary_comparison (program, $1, $3, _EQ_);
  457. }
  458. | exp NOTEQ exp {
  459. $$ = handle_binary_comparison (program, $1, $3, _NOTEQ_);
  460. }
  461. | exp LTEQ exp {
  462. $$ = handle_binary_comparison (program, $1, $3, _LTEQ_);
  463. }
  464. | exp GTEQ exp {
  465. $$ = handle_binary_comparison (program, $1, $3, _GTEQ_);
  466. }
  467. | exp SHL_OP exp { $$ = handle_bin_numeric_op(program, $1, $3, SHL); }
  468. | exp SHR_OP exp { $$ = handle_bin_numeric_op(program, $1, $3, SHR); }
  469. | exp ANDAND exp { $$ = handle_bin_numeric_op(program, $1, $3, ANDL); }
  470. | exp OROR exp { $$ = handle_bin_numeric_op(program, $1, $3, ORL); }
  471. | LPAR exp RPAR { $$ = $2; }
  472. | MINUS exp {
  473. if ($2.expression_type == IMMEDIATE)
  474. {
  475. $$ = $2;
  476. $$.value = - ($$.value);
  477. }
  478. else
  479. {
  480. t_axe_expression exp_r0;
  481. /* create an expression for regisrer REG_0 */
  482. exp_r0.value = REG_0;
  483. exp_r0.expression_type = REGISTER;
  484. $$ = handle_bin_numeric_op
  485. (program, exp_r0, $2, SUB);
  486. }
  487. }
  488. ;
  489. %%
  490. /*=========================================================================
  491. MAIN
  492. =========================================================================*/
  493. int main (int argc, char **argv)
  494. {
  495. /* initialize all the compiler data structures and global variables */
  496. init_compiler(argc, argv);
  497. /* start the parsing procedure */
  498. yyparse();
  499. #ifndef NDEBUG
  500. fprintf(stdout, "Parsing process completed. \n");
  501. #endif
  502. /* test if the parsing process completed succesfully */
  503. checkConsistency();
  504. #ifndef NDEBUG
  505. fprintf(stdout, "Creating a control flow graph. \n");
  506. #endif
  507. /* create the control flow graph */
  508. graph = createFlowGraph(program->instructions);
  509. checkConsistency();
  510. #ifndef NDEBUG
  511. assert(program != NULL);
  512. assert(program->sy_table != NULL);
  513. assert(file_infos != NULL);
  514. assert(file_infos->syTable_output != NULL);
  515. printSymbolTable(program->sy_table, file_infos->syTable_output);
  516. printGraphInfos(graph, file_infos->cfg_1, 0);
  517. fprintf(stdout, "Updating the basic blocks. \n");
  518. #endif
  519. /* update the control flow graph by inserting load and stores inside
  520. * every basic block */
  521. graph = insertLoadAndStoreInstr(program, graph);
  522. #ifndef NDEBUG
  523. fprintf(stdout, "Executing a liveness analysis on the intermediate code \n");
  524. #endif
  525. performLivenessAnalysis(graph);
  526. checkConsistency();
  527. #ifndef NDEBUG
  528. printGraphInfos(graph, file_infos->cfg_2, 1);
  529. #endif
  530. #ifndef NDEBUG
  531. fprintf(stdout, "Starting the register allocation process. \n");
  532. #endif
  533. /* initialize the register allocator by using the control flow
  534. * informations stored into the control flow graph */
  535. RA = initializeRegAlloc(graph);
  536. /* execute the linear scan algorythm */
  537. execute_linear_scan(RA);
  538. #ifndef NDEBUG
  539. printRegAllocInfos(RA, file_infos->reg_alloc_output);
  540. #endif
  541. #ifndef NDEBUG
  542. fprintf(stdout, "Updating the control flow informations. \n");
  543. #endif
  544. /* apply changes to the program informations by using the informations
  545. * of the register allocation process */
  546. updateProgramInfos(program, graph, RA);
  547. #ifndef NDEBUG
  548. fprintf(stdout, "Writing the assembly file... \n");
  549. #endif
  550. writeAssembly(program, file_infos->output_file_name);
  551. #ifndef NDEBUG
  552. fprintf(stdout, "Assembly written on file \"%s\".\n", file_infos->output_file_name);
  553. #endif
  554. /* shutdown the compiler */
  555. shutdownCompiler(0);
  556. return 0;
  557. }
  558. /*=========================================================================
  559. YYERROR
  560. =========================================================================*/
  561. int yyerror(const char* errmsg)
  562. {
  563. errorcode = AXE_SYNTAX_ERROR;
  564. return 0;
  565. }