I'm writing a translator for uni project which should translate given Pascal code into assembler code using flex/bison. I've written parser and lexer, which generates symbol table (atm works correctly only without procedures and functions). And my question is, how do I generate assembler code from it and print it to file.
Here is my lexer:
%{
#include "parser.tab.h"
#include <string.h>
#define YY_FLEX_DEBUG 1
%}
letter [a-zA-Z]
digit [0-9]
ID {letter}({letter}|{digit})*
delim [ \t\n]
NUM {digit}+(\.{digit}+)?(E[+\-]?(digit)+)?
ws {delim}+
%%
{ws} { }
if {return(IF); }
then {return(THEN); }
else {return(ELSE); }
{NUM} {yylval.stringValue = strdup(yytext); return(NUM); }
"<" {yylval.stringValue = "<"; return(RELOP); }
"<=" {yylval.stringValue = "<="; return(RELOP); }
"=" {yylval.stringValue = "="; return(RELOP); }
">" {yylval.stringValue = ">"; return(RELOP); }
">=" {yylval.stringValue = ">="; return(RELOP); }
"<>" {yylval.stringValue = "<>"; return(RELOP); }
":=" {return(ASSIGNOP); }
do {return(DO); }
program {return(PROGRAM); }
var {return(VAR); }
array {return(ARRAY); }
of {return(OF); }
integer {return(INTEGER); }
real {return(REAL); }
function {return(FUNCTION); }
procedure {return(PROCEDURE); }
begin {return(START); }
end {return(END); }
div {yylval.stringValue = "div"; return(MULOP); }
mod {yylval.stringValue = "mod"; return(MULOP); }
and {yylval.stringValue = "and"; return(MULOP); }
"*" {yylval.stringValue = "*"; return(MULOP); }
"/" {yylval.stringValue = "/"; return(MULOP); }
while {return(WHILE); }
or {return(OR); }
"+" {yylval.stringValue = "+"; return(SIGN); }
"-" {yylval.stringValue = "-"; return(SIGN); }
".." {return(DOUBLEDOT); }
"," {return *yytext; }
"(" {return *yytext; }
")" {return *yytext; }
"[" {return *yytext; }
"]" {return *yytext; }
";" {return *yytext; }
":" {return *yytext; }
"." {return *yytext; }
not {return(NOT); }
{ID} {yylval.stringValue= strdup(yytext); return(ID);}
%%
int yywrap(void){}
Here is my parser:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "SymbolTable.h"
int errors;
int lable;
#define YYDEBUG 1
install (char *sym_name)
{
symrec *s;
s = getsym(sym_name);
if (s == 0)
s = putsym(sym_name);
else {
errors++;
printf("%s is defined\n", sym_name);
}
}
install_num (char *sym_name)
{
symrec *s;
s = getsym(sym_name);
if (s == 0)
s = putnum(sym_name);
}
context_check(char *sym_name)
{
if (getsym(sym_name) == 0)
printf("%s is undeclared\n", sym_name);
}
%}
%union
{
int intValue;
float floatValue;
char *stringValue;
int adress;
}
%start program
%token <stringValue> ID
%token <stringValue> NUM
%token IF THEN PROGRAM VAR ARRAY
%token OF INTEGER REAL
%token FUNCTION PROCEDURE
%token START END
%token ASSIGNOP RELOP MULOP
%token ELSE WHILE DO
%token SIGN OR
%token DOUBLEDOT
%token NOT
%left '-' '+'
%left '*' '/'
%%
program: PROGRAM ID '(' prog_list ')' ';' declarations subprogram_declarations compound_statement '.'
;
prog_list: ID
| prog_list ',' ID
;
identifier_list: ID {install($1);}
| identifier_list ',' ID {install($3);}
;
declarations: declarations VAR identifier_list ':' type ';'
| /* empty */
;
type: standart_type
| ARRAY '[' NUM DOUBLEDOT NUM ']' OF REAL {set_type("REALARR");}
| ARRAY '[' NUM DOUBLEDOT NUM ']' OF INTEGER {set_type("INTARR");}
;
standart_type: INTEGER {set_type("INTEGER");}
| REAL {set_type("REAL");}
;
subprogram_declarations: subprogram_declarations subprogram_declaration ';'
| /* empty */
;
subprogram_declaration: subprogram_head declarations compound_statement;
subprogram_head: FUNCTION ID arguments ':' INTEGER ';' {install($2); set_type("INTEGER");}
| FUNCTION ID arguments ':' REAL ';' {install($2); set_type("REAL");}
| PROCEDURE ID arguments ';' {install($2); set_proc($2);}
;
arguments: '(' parameter_list ')'
| /* empty */;
parameter_list: identifier_list ':' type
| parameter_list ';' identifier_list ':' type
;
compound_statement: START
optional_statements END
;
optional_statements: statement_list
| /* empty */
;
statement_list: statement
| statement_list ';' statement
;
statement: variable ASSIGNOP expression
| procedure_statement
| compound_statement
| IF expression THEN statement ELSE statement
| WHILE expression DO statement
;
variable: ID {context_check($1);}
| ID '[' expression ']' {context_check($1);}
;
procedure_statement: ID
| ID '(' expression_list ')'
;
expression_list: expression
| expression_list ',' expression
;
expression: simple_expression
| simple_expression RELOP simple_expression
;
simple_expression: term
| SIGN term
| simple_expression SIGN term
| simple_expression OR term
;
term: factor
| term MULOP factor
;
factor: variable
| ID '(' expression_list ')' {context_check($1);}
| NUM {install_num($1);}
| '(' expression ')'
| NOT factor
;
%%
main (int argc, char *argv[]) {
FILE *output = fopen("output.asm", "w");
fprintf(output, "\t jump.i #lab0\n");
extern FILE *yyin;
++argv; --argc;
yyin = fopen(argv[0], "r");
yydebug = 1;
errors = 0;
yyparse();
print_sym_table();
fprintf(output, "\t exit");
fclose(output);
}
yyerror (char *s) /* Called by yyparse on error */
{
errors++;
printf ("%s\n", s);
}
Here is symbol table:
struct symrec
{
char *name;
int addr;
char *type;
struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putnum();
void set_type();
void set_proc();
void set_func();
void print_sym_table();
symrec *putsym(char *sym_name)
{
symrec *ptr;
ptr = (symrec *)malloc(sizeof(symrec));
ptr->name = (char *)malloc(strlen(sym_name) + 1);
ptr->type = NULL;
strcpy(ptr->name,sym_name);
ptr->next = (struct symrec *)sym_table;
sym_table = ptr;
return ptr;
}
symrec *putnum(char *sym_name)
{
symrec *ptr;
char *dPos = strchr(sym_name, '.');
char *ePos = strchr(sym_name, 'e');
ptr = (symrec *)malloc(sizeof(symrec));
ptr->name = (char *)malloc(strlen(sym_name) + 1);
if ((dPos == NULL) && (ePos == NULL)){
ptr->type = (char *)malloc(strlen("INTEGER") + 1);
strcpy(ptr->type, "INTEGER");
}
else if ((dPos != NULL) && (ePos == NULL)) {
ptr->type = (char *)malloc(strlen("REAL") + 1);
strcpy(ptr->type, "REAL");
}
else {
ptr->type = (char *)malloc(strlen("FLOAT") + 1);
strcpy(ptr->type, "FLOAT");
}
strcpy(ptr->name,sym_name);
ptr->next = (struct symrec *)sym_table;
sym_table = ptr;
return ptr;
}
void set_type(char *type)
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next) {
if (ptr->type == NULL) {
ptr->type = (char *)malloc(strlen(type) + 1);
strcpy(ptr->type, type);
}
}
}
void set_proc(char *sym_name) {
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
if (strcmp (ptr->name, sym_name) == 0){
ptr->type = (char *)malloc(strlen("PROC") + 1);
strcpy(ptr->type, "PROC");
}
}
symrec *getsym(char *sym_name)
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
if (strcmp (ptr->name, sym_name) == 0)
return ptr;
return 0;
}
void print_sym_table()
{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
printf("\n%s %s\n", ptr->name, ptr->type);
}
Simple test file
program example(input, output);
var x, y: integer;
var g,h:real;
begin
g:=x+y;
write(g)
end.
And what it should print to the output file:
jump.i #lab0 ;jump.i lab0
lab0:
add.i 0,4,24 ;add.i x,y,$t0
inttoreal.i 24,28 ;inttoreal.i $t0,$t1
mov.r 28,8 ;mov.r $t1,g
write.r 8 ;write.r g
exit ;exit
comments (;jump.i lab0) are not necessary.
I know how adresses of variables should be calculated and I can translate pascal code to this assembler on paper, but I really don't understand where and what should I put in bison or flex file so it would generate assembler code into output file. I've tried to generate labels for begin statements in rule :
compound_statement: START {fprintf(output, "lab0\n");}
optional_statements END
But it got segmentation fault. It's pretty obvious how to generate labels, but how should I generate
add.i 0, 4, 24
Should I create another parser after I've built symbol table with this one? Or is it doable without additional parser. Need some hints what to do next.
So you've got this bit of code:
compound_statement: START {fprintf(output, "lab0\n");}
optional_statements END
You're on the right track doing it this way, but you get a segmentation fault when you add it in and this is because output
isn't initialised.
I can't see where you've declared the output
that is being referenced there, but it isn't the same one that is declared in main
where you open a file for output.
main (int argc, char *argv[]) {
FILE *output = fopen("output.asm", "w");
That version output
is local to main
and only visible inside that function. If you remove the declaration of output
from main
and leave just the assignment, you'll be assigning the results of fopen
to the globally declared version of output
that your bison code is using.
main (int argc, char *argv[]) {
output = fopen("output.asm", "w");
Not sure why you're having confusion with the other part of your question since you've demonstrated how to do it already in your parser. Take this bit of your parser:
variable: ID {context_check($1);}
It is taking the value of "ID" - the $1
- and passing it to that function. If you wanted "variable" to contain a value you'd store it in $$
. Then when you use "variable" higher up like in here:
statement: variable ASSIGNOP expression
$1
will contain whatever value you put in $$
for "variable". $2
will be the value obtained from "ASSIGNOP" token and $3
will have the results from "expression". And again if you store a value in $$
you'd be able to use it in anything that is expecting a "statement".
$$
, $1
etc... are all of the type you've created by using %union
, so you can also do $$.intValue
or $2.stringValue
if you need to specifically state which value you're setting.