I am trying to build a 3 address code generator which would produce:
input:x=a+3*(b/7)
output: t1=b/7
t2=3*t1
t3=a+t2
x=t3
NO matter whatever i give as input the output is "syntax error".
I'm using Windows 10.
%{
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define YYDEBUG 1
int yylex(void);
int t_count = 1;
void yyerror(char *s)
{
fprintf(stderr,"%s\n",s);
return;
}
char * generateToken(int i)
{
char* ch=(char*)malloc(sizeof(char)*5);
sprintf(ch,"t%d",i++);
return ch;
}
%}
%union { double dval; char ivar[50]; }
%token <ivar> NUMBER
%token <ivar> NAME
%type <ivar> expr
%type <ivar> term
%left '+' '-'
%left '*' '/'
%left '(' ')'
%right '='
%%
program:
line {
}
| program line {
}
;
line:
expr '\n' {
t_count =1;
}
| NAME '=' expr '\n' {
printf("%s = %s", $3,$1);
t_count=1;
}
;
expr:
expr '+' expr {
strcpy($$,generateToken(t_count));
printf("%s = %s + %s",$$,$1,$3);
}
| expr '-' expr {
strcpy($$,generateToken(t_count));
printf("%s = %s - %s",$$,$1,$3);
}
| expr '*' expr {
strcpy($$,generateToken(t_count));
printf("%s = %s * %s",$$,$1,$3);
}
| expr '/' expr {
strcpy($$,generateToken(t_count));
printf("%s = %s / %s",$$,$1,$3);
}
| term {
strcpy($$, $1);
}
| '(' expr ')' {
strcpy($$,generateToken(t_count));
printf("%s =( %s )" ,$$,$2);
}
;
term:
NAME {
strcpy($$, $1);
}
| NUMBER {
strcpy($$, $1);
}
;
%%
int main(void)
{
if (getenv("YYDEBUG")) yydebug = 1;
yyparse();
return 0;
}
%option noyywrap
%{
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "threeAdd.tab.h"
void yyerror(char*);
extern YYSTYPE yylval;
%}
NAME [a-zA-Z]
DIGIT [0-9]+
NUMBER [-]?{DIGIT}+(\.{DIGIT}+)?
%%
[ \t]+ { }
{NUMBER}
{
strcpy(yylval.ivar,yytext);
return *yylval.ivar;
}
"+" {
return *yytext;
}
"-" {
return *yytext;
}
"*" {
return *yytext;
}
"/" {
return *yytext;
}
"=" {
return *yytext;
}
"(" {
return *yytext;
}
")" {
return *yytext;
}
{NAME} {
strcpy(yylval.ivar,yytext);
return *yylval.ivar;
}
"\n" {
return *yytext;
}
exit {
return 0;
}
. {
char msg[25];
sprintf(msg," <%s>","invalid character",yytext);
yyerror(msg);
}
%%
C:\Users\USER\OneDrive\Desktop\Compiler\ICG>flex file.l
C:\Users\USER\OneDrive\Desktop\Compiler\ICG>bison -d file.y
C:\Users\USER\OneDrive\Desktop\Compiler\ICG>gcc lex.yy.c file.tab.c -o ICG.exe
C:\Users\USER\OneDrive\Desktop\Compiler\ICG>ICG.exe
3+9
syntax error
The basic problem is that you are use double-quote ("
-- strings) for tokens in your yacc file (without defining any codes for them, so they're useless), and returning single character tokens in your lex file. As a result, none of the tokens will be recognized in your parser.
Replace all the "
characters with '
characters on all the single character tokens in your yacc file (so "+"
becomes '+'
and "\n"
becomes '\n'
).
Once you fix that, you have another problem: your lex rules for {DIGITS}+
and {NAME}
don't return a token, so the token will be ignored (leading to syntax errors)
For debugging parser problems in general, it is often worth compiling with -DYYDEBUG
and sticking yydebug = 1;
into main before calling yyparse, which will cause the parser to print a trace of tokens seen and states visited. I often put
if (getenv("YYDEBUG")) yydebug = 1;
into main and just leave it there -- that way normally debugging won't be enabled, but if you set the environment variable YYDEBUG=1
before running your program, you'll see the debug trace (no need to recompile)
In order to return a token, your lexer rule needs to return
the token. So your lexer rule for NUMBER
should be:
{NUMBER} {
strcpy(yylval.ivar,yytext);
return NUMBER;
}
and similar for NAME
. Note that the opening {
of the code block must be on the same line as the pattern -- if it is on a separate line it will not be associated with the pattern.