Acse.y 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671
  1. %{
  2. /*
  3. * Andrea Di Biagio
  4. * Politecnico di Milano, 2007
  5. *
  6. * Acse.y
  7. * Formal Languages & Compilers Machine, 2007/2008
  8. *
  9. */
  10. /*************************************************************************
  11. Compiler for the language LANCE
  12. ***************************************************************************/
  13. #include <stdio.h>
  14. #include <stdlib.h>
  15. #include <assert.h>
  16. #include "axe_struct.h"
  17. #include "axe_engine.h"
  18. #include "symbol_table.h"
  19. #include "axe_errors.h"
  20. #include "collections.h"
  21. #include "axe_expressions.h"
  22. #include "axe_gencode.h"
  23. #include "axe_utils.h"
  24. #include "axe_array.h"
  25. #include "axe_cflow_graph.h"
  26. #include "cflow_constants.h"
  27. #include "axe_transform.h"
  28. #include "axe_reg_alloc.h"
  29. #include "reg_alloc_constants.h"
  30. #include "axe_io_manager.h"
  31. #ifndef NDEBUG
  32. # include "axe_debug.h"
  33. #endif
  34. /* global variables */
  35. int line_num; /* this variable will keep track of the
  36. * source code line number. Every time that a newline
  37. * is encountered while parsing the input file, this
  38. * value is increased by 1. This value is then used
  39. * for error tracking: if the parser returns an error
  40. * or a warning, this value is used in order to notify
  41. * in which line of code the error has been found */
  42. int num_error; /* the number of errors found in the code. This value
  43. * is increased by 1 every time a new error is found
  44. * in the code. */
  45. int num_warning; /* As for the `num_error' global variable, this one
  46. * keeps track of all the warning messages displayed */
  47. /* errorcode is defined inside "axe_engine.c" */
  48. extern int errorcode; /* this variable is used to test if an error is found
  49. * while parsing the input file. It also is set
  50. * to notify if the compiler internal state is invalid.
  51. * When the parsing process is started, the value
  52. * of `errorcode' is set to the value of the macro
  53. * `AXE_OK' defined in "axe_constants.h".
  54. * As long as everything (the parsed source code and
  55. * the internal state of the compiler) is correct,
  56. * the value of `errorcode' is set to `AXE_OK'.
  57. * When an error occurs (because the input file contains
  58. * one or more syntax errors or because something went
  59. * wrong in the machine internal state), the errorcode
  60. * is set to a value that is different from `AXE_OK'. */
  61. extern int cflow_errorcode; /* As for `errorcode' this value is used to
  62. * test if an error occurs during the creation process of
  63. * a control flow graph. More informations can be found
  64. * analyzing the file `axe_cflow_graph.h'. */
  65. /* program informations */
  66. t_program_infos *program; /* The singleton instance of `program'.
  67. * An instance of `t_program_infos' holds in its
  68. * internal structure, all the useful informations
  69. * about a program. For example: the assembly
  70. * (code and directives); the symbol table;
  71. * the label manager (see axe_labels.h) etc. */
  72. t_cflow_Graph *graph; /* An instance of a control flow graph. This instance
  73. * will be generated starting from `program' and will
  74. * be used during the register allocation process */
  75. t_reg_allocator *RA; /* Register allocator. It implements the "Linear scan"
  76. * algorythm */
  77. t_io_infos *file_infos; /* input and output files used by the compiler */
  78. %}
  79. %expect 1
  80. /*=========================================================================
  81. SEMANTIC RECORDS
  82. =========================================================================*/
  83. %union {
  84. int intval;
  85. char *svalue;
  86. t_axe_expression expr;
  87. t_axe_declaration *decl;
  88. t_list *list;
  89. t_axe_label *label;
  90. t_while_statement while_stmt;
  91. }
  92. /*=========================================================================
  93. TOKENS
  94. =========================================================================*/
  95. %start program
  96. %token LBRACE RBRACE LPAR RPAR LSQUARE RSQUARE
  97. %token SEMI COLON PLUS MINUS MUL_OP DIV_OP MOD_OP
  98. %token AND_OP OR_OP NOT_OP
  99. %token ASSIGN LT GT SHL_OP SHR_OP EQ NOTEQ LTEQ GTEQ
  100. %token ANDAND OROR
  101. %token COMMA
  102. %token FOR
  103. %token RETURN
  104. %token READ
  105. %token WRITE
  106. %token BRANGE
  107. %token <label> DO
  108. %token <while_stmt> WHILE
  109. %token <label> IF
  110. %token <label> ELSE
  111. %token <intval> TYPE
  112. %token <svalue> IDENTIFIER
  113. %token <intval> NUMBER
  114. %type <expr> exp
  115. %type <decl> declaration
  116. %type <list> declaration_list
  117. %type <label> if_stmt
  118. /*=========================================================================
  119. OPERATOR PRECEDENCES
  120. =========================================================================*/
  121. %left COMMA
  122. %left ASSIGN
  123. %left OROR
  124. %left ANDAND
  125. %left OR_OP
  126. %left AND_OP
  127. %left EQ NOTEQ
  128. %left LT GT LTEQ GTEQ
  129. %left SHL_OP SHR_OP
  130. %left MINUS PLUS
  131. %left MUL_OP DIV_OP
  132. %right NOT
  133. /*=========================================================================
  134. BISON GRAMMAR
  135. =========================================================================*/
  136. %%
  137. /* `program' is the starting non-terminal of the grammar.
  138. * A program is composed by:
  139. 1. declarations (zero or more);
  140. 2. A list of instructions. (at least one instruction!).
  141. * When the rule associated with the non-terminal `program' is executed,
  142. * the parser notify it to the `program' singleton instance. */
  143. program : var_declarations statements
  144. {
  145. /* Notify the end of the program. Once called
  146. * the function `set_end_Program' - if necessary -
  147. * introduces a `HALT' instruction into the
  148. * list of instructions. */
  149. set_end_Program(program);
  150. /* return from yyparse() */
  151. YYACCEPT;
  152. }
  153. ;
  154. var_declarations : var_declarations var_declaration { /* does nothing */ }
  155. | /* empty */ { /* does nothing */ }
  156. ;
  157. var_declaration : TYPE declaration_list SEMI
  158. {
  159. /* update the program infos by adding new variables */
  160. set_new_variables(program, $1, $2);
  161. }
  162. ;
  163. declaration_list : declaration_list COMMA declaration
  164. { /* add the new declaration to the list of declarations */
  165. $$ = addElement($1, $3, -1);
  166. }
  167. | declaration
  168. {
  169. /* add the new declaration to the list of declarations */
  170. $$ = addElement(NULL, $1, -1);
  171. }
  172. ;
  173. declaration : IDENTIFIER ASSIGN NUMBER
  174. {
  175. /* create a new instance of t_axe_declaration */
  176. $$ = alloc_declaration($1, 0, 0, $3);
  177. /* test if an `out of memory' occurred */
  178. if ($$ == NULL)
  179. notifyError(AXE_OUT_OF_MEMORY);
  180. }
  181. | IDENTIFIER LSQUARE NUMBER RSQUARE
  182. {
  183. /* create a new instance of t_axe_declaration */
  184. $$ = alloc_declaration($1, 1, $3, 0);
  185. /* test if an `out of memory' occurred */
  186. if ($$ == NULL)
  187. notifyError(AXE_OUT_OF_MEMORY);
  188. }
  189. | IDENTIFIER
  190. {
  191. /* create a new instance of t_axe_declaration */
  192. $$ = alloc_declaration($1, 0, 0, 0);
  193. /* test if an `out of memory' occurred */
  194. if ($$ == NULL)
  195. notifyError(AXE_OUT_OF_MEMORY);
  196. }
  197. ;
  198. /* A block of code can be either a single statement or
  199. * a set of statements enclosed between braces */
  200. code_block : statement { /* does nothing */ }
  201. | LBRACE statements RBRACE { /* does nothing */ }
  202. ;
  203. /* One or more code statements */
  204. statements : statements statement { /* does nothing */ }
  205. | statement { /* does nothing */ }
  206. ;
  207. /* A statement can be either an assignment statement or a control statement
  208. * or a read/write statement or a semicolon */
  209. statement : assign_statement SEMI { /* does nothing */ }
  210. | control_statement { /* does nothing */ }
  211. | read_write_statement SEMI { /* does nothing */ }
  212. | SEMI { gen_nop_instruction(program); }
  213. ;
  214. control_statement : if_statement { /* does nothing */ }
  215. | while_statement { /* does nothing */ }
  216. | do_while_statement SEMI { /* does nothing */ }
  217. | return_statement SEMI { /* does nothing */ }
  218. ;
  219. read_write_statement : read_statement { /* does nothing */ }
  220. | write_statement { /* does nothing */ }
  221. ;
  222. assign_statement : IDENTIFIER LSQUARE exp RSQUARE ASSIGN exp
  223. {
  224. /* Notify to `program' that the value $6
  225. * have to be assigned to the location
  226. * addressed by $1[$3]. Where $1 is obviously
  227. * the array/pointer identifier, $3 is an expression
  228. * that holds an integer value. That value will be
  229. * used as an index for the array $1 */
  230. storeArrayElement(program, $1, $3, $6);
  231. /* free the memory associated with the IDENTIFIER.
  232. * The use of the free instruction is required
  233. * because of the value associated with IDENTIFIER.
  234. * The value of IDENTIFIER is a string created
  235. * by a call to the function `strdup' (see Acse.lex) */
  236. free($1);
  237. }
  238. | IDENTIFIER ASSIGN exp
  239. {
  240. int location;
  241. t_axe_instruction *instr;
  242. /* in order to assign a value to a variable, we have to
  243. * know where the variable is located (i.e. in which register).
  244. * the function `get_symbol_location' is used in order
  245. * to retrieve the register location assigned to
  246. * a given identifier.
  247. * A symbol table keeps track of the location of every
  248. * declared variable.
  249. * `get_symbol_location' perform a query on the symbol table
  250. * in order to discover the correct location of
  251. * the variable with $1 as identifier */
  252. /* get the location of the symbol with the given ID. */
  253. location = get_symbol_location(program, $1, 0);
  254. /* update the value of location */
  255. if ($3.expression_type == IMMEDIATE)
  256. gen_move_immediate(program, location, $3.value);
  257. else
  258. instr = gen_add_instruction
  259. (program, location, REG_0, $3.value, CG_DIRECT_ALL);
  260. /* free the memory associated with the IDENTIFIER */
  261. free($1);
  262. }
  263. ;
  264. if_statement : if_stmt
  265. {
  266. /* fix the `label_else' */
  267. assignLabel(program, $1);
  268. }
  269. | if_stmt ELSE
  270. {
  271. /* reserve a new label that points to the address where to jump if
  272. * `exp' is verified */
  273. $2 = newLabel(program);
  274. /* exit from the if-else */
  275. gen_bt_instruction (program, $2, 0);
  276. /* fix the `label_else' */
  277. assignLabel(program, $1);
  278. }
  279. code_block
  280. {
  281. /* fix the `label_else' */
  282. assignLabel(program, $2);
  283. }
  284. ;
  285. if_stmt : IF
  286. {
  287. /* the label that points to the address where to jump if
  288. * `exp' is not verified */
  289. $1 = newLabel(program);
  290. }
  291. LPAR exp RPAR
  292. {
  293. if ($4.expression_type == IMMEDIATE)
  294. gen_load_immediate(program, $4.value);
  295. else
  296. gen_andb_instruction(program, $4.value,
  297. $4.value, $4.value, CG_DIRECT_ALL);
  298. /* if `exp' returns FALSE, jump to the label $1 */
  299. gen_beq_instruction (program, $1, 0);
  300. }
  301. code_block { $$ = $1; }
  302. ;
  303. while_statement : WHILE
  304. {
  305. /* initialize the value of the non-terminal */
  306. $1 = create_while_statement();
  307. /* reserve and fix a new label */
  308. $1.label_condition
  309. = assignNewLabel(program);
  310. }
  311. LPAR exp RPAR
  312. {
  313. if ($4.expression_type == IMMEDIATE)
  314. gen_load_immediate(program, $4.value);
  315. else
  316. gen_andb_instruction(program, $4.value,
  317. $4.value, $4.value, CG_DIRECT_ALL);
  318. /* reserve a new label. This new label will point
  319. * to the first instruction after the while code
  320. * block */
  321. $1.label_end = newLabel(program);
  322. /* if `exp' returns FALSE, jump to the label $1.label_end */
  323. gen_beq_instruction (program, $1.label_end, 0);
  324. }
  325. code_block
  326. {
  327. /* jump to the beginning of the loop */
  328. gen_bt_instruction
  329. (program, $1.label_condition, 0);
  330. /* fix the label `label_end' */
  331. assignLabel(program, $1.label_end);
  332. }
  333. ;
  334. do_while_statement : DO
  335. {
  336. /* the label that points to the address where to jump if
  337. * `exp' is not verified */
  338. $1 = newLabel(program);
  339. /* fix the label */
  340. assignLabel(program, $1);
  341. }
  342. code_block WHILE LPAR exp RPAR
  343. {
  344. if ($6.expression_type == IMMEDIATE)
  345. gen_load_immediate(program, $6.value);
  346. else
  347. gen_andb_instruction(program, $6.value,
  348. $6.value, $6.value, CG_DIRECT_ALL);
  349. /* if `exp' returns TRUE, jump to the label $1 */
  350. gen_bne_instruction (program, $1, 0);
  351. }
  352. ;
  353. return_statement : RETURN
  354. {
  355. /* insert an HALT instruction */
  356. gen_halt_instruction(program);
  357. }
  358. ;
  359. read_statement : READ LPAR IDENTIFIER RPAR
  360. {
  361. int location;
  362. /* read from standard input an integer value and assign
  363. * it to a variable associated with the given identifier */
  364. /* get the location of the symbol with the given ID */
  365. /* lookup the symbol table and fetch the register location
  366. * associated with the IDENTIFIER $3. */
  367. location = get_symbol_location(program, $3, 0);
  368. /* insert a read instruction */
  369. gen_read_instruction (program, location);
  370. /* free the memory associated with the IDENTIFIER */
  371. free($3);
  372. }
  373. ;
  374. write_statement : WRITE LPAR exp RPAR
  375. {
  376. int location;
  377. if ($3.expression_type == IMMEDIATE)
  378. {
  379. /* load `immediate' into a new register. Returns the new register
  380. * identifier or REG_INVALID if an error occurs */
  381. location = gen_load_immediate(program, $3.value);
  382. }
  383. else
  384. location = $3.value;
  385. /* write to standard output an integer value */
  386. gen_write_instruction (program, location);
  387. }
  388. ;
  389. exp: NUMBER { $$ = create_expression ($1, IMMEDIATE); }
  390. | IDENTIFIER {
  391. int location;
  392. /* get the location of the symbol with the given ID */
  393. location = get_symbol_location(program, $1, 0);
  394. /* return the register location of IDENTIFIER as
  395. * a value for `exp' */
  396. $$ = create_expression (location, REGISTER);
  397. /* free the memory associated with the IDENTIFIER */
  398. free($1);
  399. }
  400. | IDENTIFIER LSQUARE exp RSQUARE {
  401. int reg;
  402. /* load the value IDENTIFIER[exp]
  403. * into `arrayElement' */
  404. reg = loadArrayElement(program, $1, $3);
  405. /* create a new expression */
  406. $$ = create_expression (reg, REGISTER);
  407. /* free the memory associated with the IDENTIFIER */
  408. free($1);
  409. }
  410. | NOT_OP NUMBER { if ($2 == 0)
  411. $$ = create_expression (1, IMMEDIATE);
  412. else
  413. $$ = create_expression (0, IMMEDIATE);
  414. }
  415. | NOT_OP IDENTIFIER {
  416. int identifier_location;
  417. int output_register;
  418. /* get the location of the symbol with the given ID */
  419. identifier_location =
  420. get_symbol_location(program, $2, 0);
  421. /* generate a NOT instruction. In order to do this,
  422. * at first we have to ask for a free register where
  423. * to store the result of the NOT instruction. */
  424. output_register = getNewRegister(program);
  425. /* Now we are able to generate a NOT instruction */
  426. gen_notl_instruction (program, output_register
  427. , identifier_location);
  428. $$ = create_expression (output_register, REGISTER);
  429. /* free the memory associated with the IDENTIFIER */
  430. free($2);
  431. }
  432. | exp AND_OP exp {
  433. $$ = handle_bin_numeric_op(program, $1, $3, ANDB);
  434. }
  435. | exp OR_OP exp {
  436. $$ = handle_bin_numeric_op(program, $1, $3, ORB);
  437. }
  438. | exp PLUS exp {
  439. $$ = handle_bin_numeric_op(program, $1, $3, ADD);
  440. }
  441. | exp MINUS exp {
  442. $$ = handle_bin_numeric_op(program, $1, $3, SUB);
  443. }
  444. | exp MUL_OP exp {
  445. $$ = handle_bin_numeric_op(program, $1, $3, MUL);
  446. }
  447. | exp DIV_OP exp {
  448. $$ = handle_bin_numeric_op(program, $1, $3, DIV);
  449. }
  450. | exp LT exp {
  451. $$ = handle_binary_comparison (program, $1, $3, _LT_);
  452. }
  453. | exp GT exp {
  454. $$ = handle_binary_comparison (program, $1, $3, _GT_);
  455. }
  456. | exp EQ exp {
  457. $$ = handle_binary_comparison (program, $1, $3, _EQ_);
  458. }
  459. | exp NOTEQ exp {
  460. $$ = handle_binary_comparison (program, $1, $3, _NOTEQ_);
  461. }
  462. | exp LTEQ exp {
  463. $$ = handle_binary_comparison (program, $1, $3, _LTEQ_);
  464. }
  465. | exp GTEQ exp {
  466. $$ = handle_binary_comparison (program, $1, $3, _GTEQ_);
  467. }
  468. | exp SHL_OP exp { $$ = handle_bin_numeric_op(program, $1, $3, SHL); }
  469. | exp SHR_OP exp { $$ = handle_bin_numeric_op(program, $1, $3, SHR); }
  470. | exp ANDAND exp { $$ = handle_bin_numeric_op(program, $1, $3, ANDL); }
  471. | exp OROR exp { $$ = handle_bin_numeric_op(program, $1, $3, ORL); }
  472. | LPAR exp RPAR { $$ = $2; }
  473. | MINUS exp {
  474. if ($2.expression_type == IMMEDIATE)
  475. {
  476. $$ = $2;
  477. $$.value = - ($$.value);
  478. }
  479. else
  480. {
  481. t_axe_expression exp_r0;
  482. /* create an expression for regisrer REG_0 */
  483. exp_r0.value = REG_0;
  484. exp_r0.expression_type = REGISTER;
  485. $$ = handle_bin_numeric_op
  486. (program, exp_r0, $2, SUB);
  487. }
  488. }
  489. | BRANGE LPAR exp COMMA exp COMMA exp RPAR {
  490. $$ = handle_brange_op(program, $3, $5, $7);
  491. }
  492. ;
  493. %%
  494. /*=========================================================================
  495. MAIN
  496. =========================================================================*/
  497. int main (int argc, char **argv)
  498. {
  499. /* initialize all the compiler data structures and global variables */
  500. init_compiler(argc, argv);
  501. /* start the parsing procedure */
  502. yyparse();
  503. #ifndef NDEBUG
  504. fprintf(stdout, "Parsing process completed. \n");
  505. #endif
  506. /* test if the parsing process completed succesfully */
  507. checkConsistency();
  508. #ifndef NDEBUG
  509. fprintf(stdout, "Creating a control flow graph. \n");
  510. #endif
  511. /* create the control flow graph */
  512. graph = createFlowGraph(program->instructions);
  513. checkConsistency();
  514. #ifndef NDEBUG
  515. assert(program != NULL);
  516. assert(program->sy_table != NULL);
  517. assert(file_infos != NULL);
  518. assert(file_infos->syTable_output != NULL);
  519. printSymbolTable(program->sy_table, file_infos->syTable_output);
  520. printGraphInfos(graph, file_infos->cfg_1, 0);
  521. fprintf(stdout, "Updating the basic blocks. \n");
  522. #endif
  523. /* update the control flow graph by inserting load and stores inside
  524. * every basic block */
  525. graph = insertLoadAndStoreInstr(program, graph);
  526. #ifndef NDEBUG
  527. fprintf(stdout, "Executing a liveness analysis on the intermediate code \n");
  528. #endif
  529. performLivenessAnalysis(graph);
  530. checkConsistency();
  531. #ifndef NDEBUG
  532. printGraphInfos(graph, file_infos->cfg_2, 1);
  533. #endif
  534. #ifndef NDEBUG
  535. fprintf(stdout, "Starting the register allocation process. \n");
  536. #endif
  537. /* initialize the register allocator by using the control flow
  538. * informations stored into the control flow graph */
  539. RA = initializeRegAlloc(graph);
  540. /* execute the linear scan algorythm */
  541. execute_linear_scan(RA);
  542. #ifndef NDEBUG
  543. printRegAllocInfos(RA, file_infos->reg_alloc_output);
  544. #endif
  545. #ifndef NDEBUG
  546. fprintf(stdout, "Updating the control flow informations. \n");
  547. #endif
  548. /* apply changes to the program informations by using the informations
  549. * of the register allocation process */
  550. updateProgramInfos(program, graph, RA);
  551. #ifndef NDEBUG
  552. fprintf(stdout, "Writing the assembly file... \n");
  553. #endif
  554. writeAssembly(program, file_infos->output_file_name);
  555. #ifndef NDEBUG
  556. fprintf(stdout, "Assembly written on file \"%s\".\n", file_infos->output_file_name);
  557. #endif
  558. /* shutdown the compiler */
  559. shutdownCompiler(0);
  560. return 0;
  561. }
  562. /*=========================================================================
  563. YYERROR
  564. =========================================================================*/
  565. int yyerror(const char* errmsg)
  566. {
  567. errorcode = AXE_SYNTAX_ERROR;
  568. return 0;
  569. }