Acse.y 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. %{
  2. /*
  3. * Andrea Di Biagio
  4. * Politecnico di Milano, 2007
  5. *
  6. * Acse.y
  7. * Formal Languages & Compilers Machine, 2007/2008
  8. *
  9. */
  10. /*************************************************************************
  11. Compiler for the language LANCE
  12. ***************************************************************************/
  13. #include <stdio.h>
  14. #include <stdlib.h>
  15. #include <assert.h>
  16. #include "axe_struct.h"
  17. #include "axe_engine.h"
  18. #include "symbol_table.h"
  19. #include "axe_errors.h"
  20. #include "collections.h"
  21. #include "axe_expressions.h"
  22. #include "axe_gencode.h"
  23. #include "axe_utils.h"
  24. #include "axe_array.h"
  25. #include "axe_cflow_graph.h"
  26. #include "cflow_constants.h"
  27. #include "axe_transform.h"
  28. #include "axe_reg_alloc.h"
  29. #include "reg_alloc_constants.h"
  30. #include "axe_io_manager.h"
  31. #ifndef NDEBUG
  32. # include "axe_debug.h"
  33. #endif
  34. /* global variables */
  35. int line_num; /* this variable will keep track of the
  36. * source code line number. Every time that a newline
  37. * is encountered while parsing the input file, this
  38. * value is increased by 1. This value is then used
  39. * for error tracking: if the parser returns an error
  40. * or a warning, this value is used in order to notify
  41. * in which line of code the error has been found */
  42. int num_error; /* the number of errors found in the code. This value
  43. * is increased by 1 every time a new error is found
  44. * in the code. */
  45. int num_warning; /* As for the `num_error' global variable, this one
  46. * keeps track of all the warning messages displayed */
  47. /* errorcode is defined inside "axe_engine.c" */
  48. extern int errorcode; /* this variable is used to test if an error is found
  49. * while parsing the input file. It also is set
  50. * to notify if the compiler internal state is invalid.
  51. * When the parsing process is started, the value
  52. * of `errorcode' is set to the value of the macro
  53. * `AXE_OK' defined in "axe_constants.h".
  54. * As long as everything (the parsed source code and
  55. * the internal state of the compiler) is correct,
  56. * the value of `errorcode' is set to `AXE_OK'.
  57. * When an error occurs (because the input file contains
  58. * one or more syntax errors or because something went
  59. * wrong in the machine internal state), the errorcode
  60. * is set to a value that is different from `AXE_OK'. */
  61. extern int cflow_errorcode; /* As for `errorcode' this value is used to
  62. * test if an error occurs during the creation process of
  63. * a control flow graph. More informations can be found
  64. * analyzing the file `axe_cflow_graph.h'. */
  65. /* program informations */
  66. t_program_infos *program; /* The singleton instance of `program'.
  67. * An instance of `t_program_infos' holds in its
  68. * internal structure, all the useful informations
  69. * about a program. For example: the assembly
  70. * (code and directives); the symbol table;
  71. * the label manager (see axe_labels.h) etc. */
  72. t_cflow_Graph *graph; /* An instance of a control flow graph. This instance
  73. * will be generated starting from `program' and will
  74. * be used during the register allocation process */
  75. t_reg_allocator *RA; /* Register allocator. It implements the "Linear scan"
  76. * algorythm */
  77. t_io_infos *file_infos; /* input and output files used by the compiler */
  78. %}
  79. %expect 1
  80. /*=========================================================================
  81. SEMANTIC RECORDS
  82. =========================================================================*/
  83. %union {
  84. int intval;
  85. char *svalue;
  86. t_axe_expression expr;
  87. t_axe_declaration *decl;
  88. t_list *list;
  89. t_axe_label *label;
  90. t_while_statement while_stmt;
  91. }
  92. /*=========================================================================
  93. TOKENS
  94. =========================================================================*/
  95. %start program
  96. %token LBRACE RBRACE LPAR RPAR LSQUARE RSQUARE
  97. %token SEMI COLON PLUS MINUS MUL_OP DIV_OP MOD_OP
  98. %token AND_OP OR_OP NOT_OP
  99. %token ASSIGN LT GT SHL_OP SHR_OP EQ NOTEQ LTEQ GTEQ
  100. %token ANDAND OROR
  101. %token COMMA
  102. %token FOR
  103. %token RETURN
  104. %token READ
  105. %token WRITE
  106. %token MERGE
  107. %token <label> DO
  108. %token <while_stmt> WHILE
  109. %token <label> IF
  110. %token <label> ELSE
  111. %token <intval> TYPE
  112. %token <svalue> IDENTIFIER
  113. %token <intval> NUMBER
  114. %type <expr> exp
  115. %type <decl> declaration
  116. %type <list> declaration_list
  117. %type <label> if_stmt
  118. /*=========================================================================
  119. OPERATOR PRECEDENCES
  120. =========================================================================*/
  121. %left COMMA
  122. %left ASSIGN
  123. %nonassoc MERGE
  124. %left OROR
  125. %left ANDAND
  126. %left OR_OP
  127. %left AND_OP
  128. %left EQ NOTEQ
  129. %left LT GT LTEQ GTEQ
  130. %left SHL_OP SHR_OP
  131. %left MINUS PLUS
  132. %left MUL_OP DIV_OP
  133. %right NOT
  134. /*=========================================================================
  135. BISON GRAMMAR
  136. =========================================================================*/
  137. %%
  138. /* `program' is the starting non-terminal of the grammar.
  139. * A program is composed by:
  140. 1. declarations (zero or more);
  141. 2. A list of instructions. (at least one instruction!).
  142. * When the rule associated with the non-terminal `program' is executed,
  143. * the parser notify it to the `program' singleton instance. */
  144. program : var_declarations statements
  145. {
  146. /* Notify the end of the program. Once called
  147. * the function `set_end_Program' - if necessary -
  148. * introduces a `HALT' instruction into the
  149. * list of instructions. */
  150. set_end_Program(program);
  151. /* return from yyparse() */
  152. YYACCEPT;
  153. }
  154. ;
  155. var_declarations : var_declarations var_declaration { /* does nothing */ }
  156. | /* empty */ { /* does nothing */ }
  157. ;
  158. var_declaration : TYPE declaration_list SEMI
  159. {
  160. /* update the program infos by adding new variables */
  161. set_new_variables(program, $1, $2);
  162. }
  163. ;
  164. declaration_list : declaration_list COMMA declaration
  165. { /* add the new declaration to the list of declarations */
  166. $$ = addElement($1, $3, -1);
  167. }
  168. | declaration
  169. {
  170. /* add the new declaration to the list of declarations */
  171. $$ = addElement(NULL, $1, -1);
  172. }
  173. ;
  174. declaration : IDENTIFIER ASSIGN NUMBER
  175. {
  176. /* create a new instance of t_axe_declaration */
  177. $$ = alloc_declaration($1, 0, 0, $3);
  178. /* test if an `out of memory' occurred */
  179. if ($$ == NULL)
  180. notifyError(AXE_OUT_OF_MEMORY);
  181. }
  182. | IDENTIFIER LSQUARE NUMBER RSQUARE
  183. {
  184. /* create a new instance of t_axe_declaration */
  185. $$ = alloc_declaration($1, 1, $3, 0);
  186. /* test if an `out of memory' occurred */
  187. if ($$ == NULL)
  188. notifyError(AXE_OUT_OF_MEMORY);
  189. }
  190. | IDENTIFIER
  191. {
  192. /* create a new instance of t_axe_declaration */
  193. $$ = alloc_declaration($1, 0, 0, 0);
  194. /* test if an `out of memory' occurred */
  195. if ($$ == NULL)
  196. notifyError(AXE_OUT_OF_MEMORY);
  197. }
  198. ;
  199. /* A block of code can be either a single statement or
  200. * a set of statements enclosed between braces */
  201. code_block : statement { /* does nothing */ }
  202. | LBRACE statements RBRACE { /* does nothing */ }
  203. ;
  204. /* One or more code statements */
  205. statements : statements statement { /* does nothing */ }
  206. | statement { /* does nothing */ }
  207. ;
  208. /* A statement can be either an assignment statement or a control statement
  209. * or a read/write statement or a semicolon */
  210. statement : assign_statement SEMI { /* does nothing */ }
  211. | control_statement { /* does nothing */ }
  212. | read_write_statement SEMI { /* does nothing */ }
  213. | SEMI { gen_nop_instruction(program); }
  214. ;
  215. control_statement : if_statement { /* does nothing */ }
  216. | while_statement { /* does nothing */ }
  217. | do_while_statement SEMI { /* does nothing */ }
  218. | return_statement SEMI { /* does nothing */ }
  219. ;
  220. read_write_statement : read_statement { /* does nothing */ }
  221. | write_statement { /* does nothing */ }
  222. ;
  223. assign_statement : IDENTIFIER LSQUARE exp RSQUARE ASSIGN exp
  224. {
  225. /* Notify to `program' that the value $6
  226. * have to be assigned to the location
  227. * addressed by $1[$3]. Where $1 is obviously
  228. * the array/pointer identifier, $3 is an expression
  229. * that holds an integer value. That value will be
  230. * used as an index for the array $1 */
  231. storeArrayElement(program, $1, $3, $6);
  232. /* free the memory associated with the IDENTIFIER.
  233. * The use of the free instruction is required
  234. * because of the value associated with IDENTIFIER.
  235. * The value of IDENTIFIER is a string created
  236. * by a call to the function `strdup' (see Acse.lex) */
  237. free($1);
  238. }
  239. | IDENTIFIER ASSIGN exp
  240. {
  241. int location;
  242. t_axe_instruction *instr;
  243. /* in order to assign a value to a variable, we have to
  244. * know where the variable is located (i.e. in which register).
  245. * the function `get_symbol_location' is used in order
  246. * to retrieve the register location assigned to
  247. * a given identifier.
  248. * A symbol table keeps track of the location of every
  249. * declared variable.
  250. * `get_symbol_location' perform a query on the symbol table
  251. * in order to discover the correct location of
  252. * the variable with $1 as identifier */
  253. /* get the location of the symbol with the given ID. */
  254. location = get_symbol_location(program, $1, 0);
  255. /* update the value of location */
  256. if ($3.expression_type == IMMEDIATE)
  257. gen_move_immediate(program, location, $3.value);
  258. else
  259. instr = gen_add_instruction
  260. (program, location, REG_0, $3.value, CG_DIRECT_ALL);
  261. /* free the memory associated with the IDENTIFIER */
  262. free($1);
  263. }
  264. ;
  265. if_statement : if_stmt
  266. {
  267. /* fix the `label_else' */
  268. assignLabel(program, $1);
  269. }
  270. | if_stmt ELSE
  271. {
  272. /* reserve a new label that points to the address where to jump if
  273. * `exp' is verified */
  274. $2 = newLabel(program);
  275. /* exit from the if-else */
  276. gen_bt_instruction (program, $2, 0);
  277. /* fix the `label_else' */
  278. assignLabel(program, $1);
  279. }
  280. code_block
  281. {
  282. /* fix the `label_else' */
  283. assignLabel(program, $2);
  284. }
  285. ;
  286. if_stmt : IF
  287. {
  288. /* the label that points to the address where to jump if
  289. * `exp' is not verified */
  290. $1 = newLabel(program);
  291. }
  292. LPAR exp RPAR
  293. {
  294. if ($4.expression_type == IMMEDIATE)
  295. gen_load_immediate(program, $4.value);
  296. else
  297. gen_andb_instruction(program, $4.value,
  298. $4.value, $4.value, CG_DIRECT_ALL);
  299. /* if `exp' returns FALSE, jump to the label $1 */
  300. gen_beq_instruction (program, $1, 0);
  301. }
  302. code_block { $$ = $1; }
  303. ;
  304. while_statement : WHILE
  305. {
  306. /* initialize the value of the non-terminal */
  307. $1 = create_while_statement();
  308. /* reserve and fix a new label */
  309. $1.label_condition
  310. = assignNewLabel(program);
  311. }
  312. LPAR exp RPAR
  313. {
  314. if ($4.expression_type == IMMEDIATE)
  315. gen_load_immediate(program, $4.value);
  316. else
  317. gen_andb_instruction(program, $4.value,
  318. $4.value, $4.value, CG_DIRECT_ALL);
  319. /* reserve a new label. This new label will point
  320. * to the first instruction after the while code
  321. * block */
  322. $1.label_end = newLabel(program);
  323. /* if `exp' returns FALSE, jump to the label $1.label_end */
  324. gen_beq_instruction (program, $1.label_end, 0);
  325. }
  326. code_block
  327. {
  328. /* jump to the beginning of the loop */
  329. gen_bt_instruction
  330. (program, $1.label_condition, 0);
  331. /* fix the label `label_end' */
  332. assignLabel(program, $1.label_end);
  333. }
  334. ;
  335. do_while_statement : DO
  336. {
  337. /* the label that points to the address where to jump if
  338. * `exp' is not verified */
  339. $1 = newLabel(program);
  340. /* fix the label */
  341. assignLabel(program, $1);
  342. }
  343. code_block WHILE LPAR exp RPAR
  344. {
  345. if ($6.expression_type == IMMEDIATE)
  346. gen_load_immediate(program, $6.value);
  347. else
  348. gen_andb_instruction(program, $6.value,
  349. $6.value, $6.value, CG_DIRECT_ALL);
  350. /* if `exp' returns TRUE, jump to the label $1 */
  351. gen_bne_instruction (program, $1, 0);
  352. }
  353. ;
  354. return_statement : RETURN
  355. {
  356. /* insert an HALT instruction */
  357. gen_halt_instruction(program);
  358. }
  359. ;
  360. read_statement : READ LPAR IDENTIFIER RPAR
  361. {
  362. int location;
  363. /* read from standard input an integer value and assign
  364. * it to a variable associated with the given identifier */
  365. /* get the location of the symbol with the given ID */
  366. /* lookup the symbol table and fetch the register location
  367. * associated with the IDENTIFIER $3. */
  368. location = get_symbol_location(program, $3, 0);
  369. /* insert a read instruction */
  370. gen_read_instruction (program, location);
  371. /* free the memory associated with the IDENTIFIER */
  372. free($3);
  373. }
  374. ;
  375. write_statement : WRITE LPAR exp RPAR
  376. {
  377. int location;
  378. if ($3.expression_type == IMMEDIATE)
  379. {
  380. /* load `immediate' into a new register. Returns the new register
  381. * identifier or REG_INVALID if an error occurs */
  382. location = gen_load_immediate(program, $3.value);
  383. }
  384. else
  385. location = $3.value;
  386. /* write to standard output an integer value */
  387. gen_write_instruction (program, location);
  388. }
  389. ;
  390. exp: NUMBER { $$ = create_expression ($1, IMMEDIATE); }
  391. | IDENTIFIER {
  392. int location;
  393. /* get the location of the symbol with the given ID */
  394. location = get_symbol_location(program, $1, 0);
  395. /* return the register location of IDENTIFIER as
  396. * a value for `exp' */
  397. $$ = create_expression (location, REGISTER);
  398. /* free the memory associated with the IDENTIFIER */
  399. free($1);
  400. }
  401. | IDENTIFIER LSQUARE exp RSQUARE {
  402. int reg;
  403. /* load the value IDENTIFIER[exp]
  404. * into `arrayElement' */
  405. reg = loadArrayElement(program, $1, $3);
  406. /* create a new expression */
  407. $$ = create_expression (reg, REGISTER);
  408. /* free the memory associated with the IDENTIFIER */
  409. free($1);
  410. }
  411. | NOT_OP NUMBER { if ($2 == 0)
  412. $$ = create_expression (1, IMMEDIATE);
  413. else
  414. $$ = create_expression (0, IMMEDIATE);
  415. }
  416. | NOT_OP IDENTIFIER {
  417. int identifier_location;
  418. int output_register;
  419. /* get the location of the symbol with the given ID */
  420. identifier_location =
  421. get_symbol_location(program, $2, 0);
  422. /* generate a NOT instruction. In order to do this,
  423. * at first we have to ask for a free register where
  424. * to store the result of the NOT instruction. */
  425. output_register = getNewRegister(program);
  426. /* Now we are able to generate a NOT instruction */
  427. gen_notl_instruction (program, output_register
  428. , identifier_location);
  429. $$ = create_expression (output_register, REGISTER);
  430. /* free the memory associated with the IDENTIFIER */
  431. free($2);
  432. }
  433. | exp AND_OP exp {
  434. $$ = handle_bin_numeric_op(program, $1, $3, ANDB);
  435. }
  436. | exp OR_OP exp {
  437. $$ = handle_bin_numeric_op(program, $1, $3, ORB);
  438. }
  439. | exp PLUS exp {
  440. $$ = handle_bin_numeric_op(program, $1, $3, ADD);
  441. }
  442. | exp MINUS exp {
  443. $$ = handle_bin_numeric_op(program, $1, $3, SUB);
  444. }
  445. | exp MUL_OP exp {
  446. $$ = handle_bin_numeric_op(program, $1, $3, MUL);
  447. }
  448. | exp DIV_OP exp {
  449. $$ = handle_bin_numeric_op(program, $1, $3, DIV);
  450. }
  451. | exp LT exp {
  452. $$ = handle_binary_comparison (program, $1, $3, _LT_);
  453. }
  454. | exp GT exp {
  455. $$ = handle_binary_comparison (program, $1, $3, _GT_);
  456. }
  457. | exp EQ exp {
  458. $$ = handle_binary_comparison (program, $1, $3, _EQ_);
  459. }
  460. | exp NOTEQ exp {
  461. $$ = handle_binary_comparison (program, $1, $3, _NOTEQ_);
  462. }
  463. | exp LTEQ exp {
  464. $$ = handle_binary_comparison (program, $1, $3, _LTEQ_);
  465. }
  466. | exp GTEQ exp {
  467. $$ = handle_binary_comparison (program, $1, $3, _GTEQ_);
  468. }
  469. | exp SHL_OP exp { $$ = handle_bin_numeric_op(program, $1, $3, SHL); }
  470. | exp SHR_OP exp { $$ = handle_bin_numeric_op(program, $1, $3, SHR); }
  471. | exp ANDAND exp { $$ = handle_bin_numeric_op(program, $1, $3, ANDL); }
  472. | exp OROR exp { $$ = handle_bin_numeric_op(program, $1, $3, ORL); }
  473. | LPAR exp RPAR { $$ = $2; }
  474. | MINUS exp {
  475. if ($2.expression_type == IMMEDIATE)
  476. {
  477. $$ = $2;
  478. $$.value = - ($$.value);
  479. }
  480. else
  481. {
  482. t_axe_expression exp_r0;
  483. /* create an expression for regisrer REG_0 */
  484. exp_r0.value = REG_0;
  485. exp_r0.expression_type = REGISTER;
  486. $$ = handle_bin_numeric_op
  487. (program, exp_r0, $2, SUB);
  488. }
  489. }
  490. | MERGE exp COMMA exp COMMA exp
  491. {
  492. t_axe_expression tmp = handle_bin_numeric_op(program, $2, $6, MUL);
  493. t_axe_expression zero = create_expression(0, IMMEDIATE);
  494. t_axe_expression inv = handle_binary_comparison(program, $6, zero,
  495. _EQ_);
  496. t_axe_expression tmp2 = handle_bin_numeric_op(program, $4, inv, MUL);
  497. $$ = handle_bin_numeric_op(program, tmp, tmp2, ORB);
  498. }
  499. ;
  500. %%
  501. /*=========================================================================
  502. MAIN
  503. =========================================================================*/
  504. int main (int argc, char **argv)
  505. {
  506. /* initialize all the compiler data structures and global variables */
  507. init_compiler(argc, argv);
  508. /* start the parsing procedure */
  509. yyparse();
  510. #ifndef NDEBUG
  511. fprintf(stdout, "Parsing process completed. \n");
  512. #endif
  513. /* test if the parsing process completed succesfully */
  514. checkConsistency();
  515. #ifndef NDEBUG
  516. fprintf(stdout, "Creating a control flow graph. \n");
  517. #endif
  518. /* create the control flow graph */
  519. graph = createFlowGraph(program->instructions);
  520. checkConsistency();
  521. #ifndef NDEBUG
  522. assert(program != NULL);
  523. assert(program->sy_table != NULL);
  524. assert(file_infos != NULL);
  525. assert(file_infos->syTable_output != NULL);
  526. printSymbolTable(program->sy_table, file_infos->syTable_output);
  527. printGraphInfos(graph, file_infos->cfg_1, 0);
  528. fprintf(stdout, "Updating the basic blocks. \n");
  529. #endif
  530. /* update the control flow graph by inserting load and stores inside
  531. * every basic block */
  532. graph = insertLoadAndStoreInstr(program, graph);
  533. #ifndef NDEBUG
  534. fprintf(stdout, "Executing a liveness analysis on the intermediate code \n");
  535. #endif
  536. performLivenessAnalysis(graph);
  537. checkConsistency();
  538. #ifndef NDEBUG
  539. printGraphInfos(graph, file_infos->cfg_2, 1);
  540. #endif
  541. #ifndef NDEBUG
  542. fprintf(stdout, "Starting the register allocation process. \n");
  543. #endif
  544. /* initialize the register allocator by using the control flow
  545. * informations stored into the control flow graph */
  546. RA = initializeRegAlloc(graph);
  547. /* execute the linear scan algorythm */
  548. execute_linear_scan(RA);
  549. #ifndef NDEBUG
  550. printRegAllocInfos(RA, file_infos->reg_alloc_output);
  551. #endif
  552. #ifndef NDEBUG
  553. fprintf(stdout, "Updating the control flow informations. \n");
  554. #endif
  555. /* apply changes to the program informations by using the informations
  556. * of the register allocation process */
  557. updateProgramInfos(program, graph, RA);
  558. #ifndef NDEBUG
  559. fprintf(stdout, "Writing the assembly file... \n");
  560. #endif
  561. writeAssembly(program, file_infos->output_file_name);
  562. #ifndef NDEBUG
  563. fprintf(stdout, "Assembly written on file \"%s\".\n", file_infos->output_file_name);
  564. #endif
  565. /* shutdown the compiler */
  566. shutdownCompiler(0);
  567. return 0;
  568. }
  569. /*=========================================================================
  570. YYERROR
  571. =========================================================================*/
  572. int yyerror(const char* errmsg)
  573. {
  574. errorcode = AXE_SYNTAX_ERROR;
  575. return 0;
  576. }