def.h
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
typedef enum
{
NPROGRAM,
NVARDECLLIST,
NFUNCDECLLIST,
NVARDECL,
NIDLIST,
NOPTPARAMLIST,
NSTATLIST,
NASSIGNSTAT,
NWHILESTAT,
NIFSTAT,
NFORSTAT,
NRELEXPR,
NRETURNSTAT,
NREADSTAT,
NLOGICEXPR,
NWRITESTAT,
NNEGEXPR,
NFUNCCALL,
NEXPRLIST,
NCONDEXPR,
NPARAMDECL,
NMATHEXPR,
NCASTING
} Nonterminal;
typedef enum
{
T_BREAK,
T_TYPE,
T_BOOLEAN,
T_INTCONST,
T_REALCONST,
T_BOOLCONST,
T_WRITEOP,
T_STRCONST,
T_ID,
T_NONTERMINAL
} Typenode;
typedef union
{
int ival;
float rval;
char *sval;
enum {FALSE, TRUE} bval;
} Value;
typedef struct snode
{
Typenode type;
Value value;
struct snode *p1, *p2, *p3;
} Node;
typedef Node *Pnode;
char *newstring(char*);
int yylex();
Pnode nontermnode(Nonterminal, int),
ntn(Nonterminal),
idnode(),
keynode(Typenode, int),
intconstnode(),
realconstnode(),
strconstnode(),
boolconstnode(),
newnode(Typenode);
void yyerror(),
treeprint(Pnode, int);
lexer.lex
%{
#include "parser.h"
#include "def.h"
int line = 1;
Value lexval;
%}
%option noyywrap
spacing ([ \t])+
commento "#"(.)*\n
letter [A-Za-z]
digit [09]
intconst {digit}+
strconst \"([^\"])*\"
boolconst false|true
realconst {intconst}\.{digit}+
id {letter}({letter}|{digit})*
sugar [ \( \) : , ; \. \+ \- \* / ]
%%
{spacing} ;
\n {line++;}
integer {return(INTEGER);}
string {return(STRING);}
boolean {return(BOOLEAN);}
real {return(REAL);}
void {return(VOID);}
func {return(FUNC);}
body {return(BODY);}
end {return(END);}
else {return(ELSE);}
while {return(WHILE);}
do {return(DO);}
for {return(FOR);}
to {return(TO);}
return {return(RETURN);}
read {return(READ);}
write {return(WRITE);}
writeln {return(WRITELN);}
and {return(AND);}
or {return(OR);}
not {return(NOT);}
if {return(IF);}
then {return(THEN);}
break {return(BREAK);}
"<=" {return(LEQ);}
">=" {return(GEQ);}
"!=" {return(NEQ);}
"==" {return(EQU);}
"<" {return(LT);}
">" {return(GT);}
{intconst} {lexval.ival = atoi(yytext); return(INTCONST);}
{strconst} {lexval.sval = newstring(yytext); return(STRCONST);}
{boolconst} {lexval.bval = (yytext[0] == 'f' ? FALSE : TRUE); return(BOOLCONST);}
{realconst} {lexval.rval = atof(yytext); return(REALCONST);}
{id} {lexval.sval = newstring(yytext); return(ID);}
{sugar} {return(yytext[0]);}
. {return(ERROR);}
%%
char *newstring(char *s)
{
char *p;
p = malloc(strlen(s)+1);
strcpy(p, s);
return(p);
}
makefile
bup: lexer.o parser.o tree.o
cc -g -o bup lexer.o parser.o tree.o
lexer.o: lexer.c parser.h def.h
cc -g -c lexer.c
parser.o: parser.c def.h
cc -g -c parser.c
tree.o: tree.c def.h
cc -g -c tree.c
lexer.c: lexer.lex parser.y parser.h parser.c def.h
flex -o lexer.c lexer.lex
parser.h: parser.y def.h
bison -vd -o parser.c parser.y
parser.y
%{
#include "def.h"
#define YYSTYPE Pnode
extern char *yytext;
extern Value lexval;
extern int line;
extern FILE *yyin;
Pnode root = NULL;
%}
%token ID FUNC BODY END BREAK IF THEN ELSE TYPE WHILE DO FOR RETURN READ WRITE WRITELN
%token AND OR INTCONST REALCONST BOOLCONST STRCONST INTEGER REAL NOT STRING BOOLEAN VOID PLUS MINUS TIMES SLASH
%token LEQ GEQ NEQ EQU GT LT TO ERROR
%%
program : var-decl-list func-decl-list body '.' {root = $$ = ntn(NPROGRAM);
root->p1 = ntn(NVARDECLLIST);
root->p2 = ntn(NFUNCDECLLIST);
root->p1->p1 = $1;
root->p2->p1 = $2;
root->p3 = $3;}
;
var-decl-list : var-decl var-decl-list {$$ -> p1=$1;
$1->p3=$2;}
| {$$ = NULL;}
;
var-decl : id-list ':' type ';' {$$ = ntn(NVARDECL);
$$ -> p1 = ntn(NIDLIST);
$$->p1->p1=$1; $$ -> p1 -> p3 = $3;}
;
id-list : ID {$$ = idnode();} ',' id-list {$$ = $2;
$2 -> p3 = $4;}
| ID {$$ = idnode();}
;
type : INTEGER {$$ = keynode(T_TYPE, INTEGER);}
| REAL {$$ = keynode(T_TYPE, REAL);}
| STRING {$$ = keynode(T_TYPE, STRING);}
| BOOLEAN {$$ = keynode(T_TYPE, BOOLEAN);}
| VOID {$$ = keynode(T_TYPE, VOID); }
;
func-decl-list : func-decl func-decl-list {$$ -> p1 = $1;
$1 -> p3 = $2;}
| {$$ = NULL;}
;
func-decl : FUNC ID {$$ = idnode();} '(' opt-param-list ')' ':' type var-decl-list body ';' {$$ -> p1 = $3;
$$ -> p2 = ntn(NOPTPARAMLIST);
$$ -> p2 ->p1=$5;
$$ -> p2 -> p3 = $8;
$$ -> p2 -> p3->p3 = ntn(NVARDECLLIST);
$$ -> p2 -> p3->p3->p1 = $9;
$$ -> p2 -> p3->p3->p3 = $10;}
;
opt-param-list : param-list {$$ = $1;}
| {$$ = NULL;}
;
param-list : param-decl ',' param-list {$$ = $1;
$1 -> p3 = $3;}
| param-decl
;
param-decl : ID {$$ = idnode();} ':' type {$$=ntn(NPARAMDECL);
$$ -> p1 = $2;
$$ -> p2 = $4;}
;
body : BODY stat-list END {$$ = ntn(NSTATLIST);
$$->p1=$2;}
;
stat-list : stat ';' stat-list {$$ = $1;
$1 -> p3 = $3;}
| stat ';' {$$=$1;}
;
stat : assign-stat
| if-stat
| while-stat
| for-stat
| return-stat
| read-stat
| write-stat
| func-call
| BREAK {$$ = newnode(T_BREAK);}
;
assign-stat : ID {$$ = idnode();} '=' expr {$$ = ntn(NASSIGNSTAT);
$$ -> p1 = $2;
$$ -> p2 = $4;}
;
if-stat : IF expr THEN stat-list opt-else-stat END {$$ = ntn(NIFSTAT);
$$ -> p1 = $2;
$$ -> p2 = ntn(NSTATLIST);
$$ ->p2 -> p3 = $5;}
;
opt-else-stat : ELSE stat-list {$$ = ntn(NSTATLIST);
$$->p1=$2;}
| {$$ = NULL;}
;
while-stat : WHILE expr DO stat-list END {$$ = ntn(NWHILESTAT);
$$->p1=$2;
$$->p2=ntn(NSTATLIST);
$$->p2->p1=$4;}
;
for-stat : FOR ID {$$=idnode();} '=' expr TO expr DO stat-list END {$$ = ntn(NFORSTAT);
$$->p1=$3;
$$->p2=$5;
$$->p2->p3=$7;
$$->p2->p3->p3=ntn(NSTATLIST);
$$->p2->p3->p3->p1=$9;}
;
return-stat : RETURN opt-expr {$$ = ntn(NRETURNSTAT);
$$->p1=$2;}
;
opt-expr : expr {$$=$1;}
| {$$=NULL;}
;
read-stat : READ '(' id-list ')' {$$ = ntn(NREADSTAT);
$$->p1=ntn(NIDLIST);
$$->p1->p1=$3;}
;
write-stat : write-op '(' expr-list ')' {$$ = ntn(NWRITESTAT);
$$->p1=$1;
$$->p2=ntn(NEXPRLIST);
$$->p2->p1=$3;}
;
write-op : WRITE {$$ = keynode(T_WRITEOP, WRITE);}
| WRITELN {$$ = keynode(T_WRITEOP, WRITELN);}
;
expr-list : expr ',' expr-list {$$=$1;
$1->p3=$3;}
| expr
;
expr : expr logic-op bool-term { $$=$2;
$2->p1=$1;
$2->p2=$3;}
| bool-term
;
logic-op : AND {$$=nontermnode(NLOGICEXPR, AND);}
| OR {$$=nontermnode(NLOGICEXPR, OR);}
;
bool-term : rel-term rel-op rel-term {$$=$2;
$2->p1=$1;
$2->p2=$3;}
| rel-term
;
rel-op : EQU {$$=nontermnode(NRELEXPR, EQU);}
| NEQ {$$=nontermnode(NRELEXPR, NEQ);}
| GT {$$=nontermnode(NRELEXPR, GT);}
| GEQ {$$=nontermnode(NRELEXPR, GEQ);}
| LT {$$=nontermnode(NRELEXPR, LT);}
| LEQ {$$=nontermnode(NRELEXPR, LEQ);}
;
rel-term : rel-term low-prec-op low-term {$$=$2;
$2->p1=$1;
$2->p2=$3;}
| low-term
;
low-prec-op : PLUS {$$=nontermnode(NMATHEXPR, PLUS);}
| MINUS {$$=nontermnode(NMATHEXPR, MINUS);}
;
low-term : low-term high-prec-op factor {$$=$2;
$2->p1=$1;
$2->p2=$3;}
| factor
;
high-prec-op : TIMES {$$=nontermnode(NMATHEXPR, TIMES);}
| SLASH {$$=nontermnode(NMATHEXPR, SLASH);}
;
factor : unary-op factor {$$=$1;
$1->p3=$2;}
| '(' expr ')' {$$=$2;}
| ID {$$=idnode();}
| const {$$=$1;}
| func-call {$$=$1;}
| cond-expr {$$=$1;}
| cast '(' expr ')' {$$=$1;
$1->p3=$3;}
;
unary-op : MINUS {$$=nontermnode(NNEGEXPR, MINUS);}
| NOT {$$=nontermnode(NNEGEXPR, NOT);}
;
const : INTCONST {$$=intconstnode();}
| REALCONST {$$=realconstnode();}
| STRCONST {$$=strconstnode();}
| BOOLCONST {$$=boolconstnode();}
;
func-call : ID {$$=idnode();} '(' opt-expr-list ')' {$$ = ntn(NFUNCCALL);
$$->p1=$2; $$->p2=$4;}
;
opt-expr-list : expr-list {$$=ntn(NEXPRLIST); $$->p1=$1;}
| {$$=NULL;}
;
cond-expr : IF expr THEN expr ELSE expr END {$$=ntn(NCONDEXPR);
$$->p1=$2;
$$->p2=$4;
$$->p3=$6;}
;
cast : INTEGER {$$=nontermnode(NCASTING,INTEGER);}
| REAL {$$=nontermnode(NCASTING, REAL);}
;
%%
Pnode ntn(Nonterminal nonterm)
{
Pnode p = newnode(T_NONTERMINAL);
p->value.rval = nonterm;
return(p);
}
Pnode nontermnode(Nonterminal nonterm, int valore)
{
Pnode p = newnode(T_NONTERMINAL);
p->value.rval = nonterm;
p->value.ival = valore;
return(p);
}
Pnode idnode()
{
Pnode p = newnode(T_ID);
p->value.sval = lexval.sval;
return(p);
}
Pnode keynode(Typenode keyword, int valore)
{
Pnode p = newnode(keyword);
p->value.ival = valore;
return p;
}
Pnode intconstnode()
{
Pnode p = newnode(T_INTCONST);
p->value.ival = lexval.ival;
return(p);
}
Pnode realconstnode()
{
Pnode p = newnode(T_REALCONST);
p->value.rval = lexval.rval;
return(p);
}
Pnode strconstnode()
{
Pnode p = newnode(T_STRCONST);
p->value.sval = lexval.sval;
return(p);
}
Pnode boolconstnode()
{
Pnode p = newnode(T_BOOLCONST);
p->value.bval = lexval.bval;
return(p);
}
Pnode newnode(Typenode tnode)
{
Pnode p = malloc(sizeof(Node));
p->type = tnode;
p->p1 = p->p2 = p->p3 = NULL;
return(p);
}
int main()
{
int result;
printf("----------------------------------------------");
yyin = stdin;
if((result = yyparse()) == 0)
treeprint(root, 0);
return(result);
}
void yyerror()
{
fprintf(stderr, "Line %d: syntax error on symbol \"%s\"\n",
line, yytext);
exit(-1);
}
tree.c
#include "def.h"
char* tabtypes[] =
{
"T_BREAK",
"T_TYPE",
"T_BOOLEAN",
"T_INTCONST",
"T_REALCONST",
"T_BOOLCONST",
"T_WRITEOP",
"T_STRCONST",
"T_ID",
"T_NONTERMINAL"
};
char* tabnonterm[] =
{
"PROGRAM",
"NVARDECLLIST",
"NFUNCDECLLIST",
"NVARDECL",
"NIDLIST",
"NOPTPARAMLIST",
"NSTATLIST",
"NASSIGNSTAT",
"NWHILESTAT",
"NIFSTAT",
"NFORSTAT",
"NRELEXPR",
"NRETURNSTAT",
"NREADSTAT",
"NLOGICEXPR",
"NWRITESTAT",
"NNEGEXPR",
"NFUNCCALL",
"NEXPRLIST",
"NCONDEXPR",
"NPARAMDECL",
"NMATHEXPR",
"NCASTING"
};
void treeprint(Pnode root, int indent)
{
int i;
Pnode p;
for(i=0; i<indent; i++)
printf(" ");
printf("%s", (root->type == T_NONTERMINAL ? tabnonterm[root->value.ival] : tabtypes[root->type]));
if(root->type == T_ID || root->type == T_STRCONST)
printf(" (%s)", root->value.sval);
else if(root->type == T_INTCONST)
printf(" (%d)", root->value.ival);
else if(root->type == T_BOOLCONST)
printf(" (%s)", (root->value.ival == TRUE ? "true" : "false"));
printf("\n");
for(p=root->p1; p != NULL; p = p->p3)
treeprint(p, indent+1);
}
prog ( File with example of grammar)
numero: integer;
func fattoriale(n: integer): integer
fact: integer;
body
if n == 0 then
fact = 1;
else
fact = n * fattoriale(n1);
end;
return fact;
end;
func stampaFattoriali(tot: integer): void
i, f: integer;
body
for i=0 to tot do
f = fattoriale(i);
writeln("Il fattoriale di ", i, "è ", f);
end;
end;
body
read(numero);
if numero < 0 then
writeln("Il numero ", numero, "non è valido");
else
stampaFattoriali(numero);
end.
When i type make on terminal , it create the files : tree.o parser.o parser.h parser.c lexer.c lexer.o bup.
When i execute the bup file , the terminal show me this error message :
"ine 1: syntax error on symbol "
So it don't generate the abstract tree.
I don't know if this error refers to the prog or lexer.lex or parse.y file.
Yacc/bison assign their own numbers to terminal tokens, and assume that the lexer will use those numbers. But you provide your own numbers in the def.h
header, which yacc/bison knows absolutely nothing about. It will not correctly interpret the codes returned by yylex
which will make it impossible to parse correctly.
So don't do that.
Let bison generate the token codes, use the header file it generates (parser.h
with your settings), and don't step on its feet by trying to define the enum values yourself.
As a hint about debugging, that is really way too much code to have written before you start debugging, and that fact is exactly illustrated by your complaint at the end of your question that you don't know where to look for the error. Instead of writing the whole project and then hoping it works as a whole, write little pieces and debug them as you go. Although you need to parser to generate the token type values, you don't need to run the parser to test your scanner. You can write a simple program which repeatedly calls yylex
and prints the returned types and values. (Or you can just enable flex debugging with the -d
command line option, which is even simpler.)
Similarly, you should be able to test your AST methods by writing some test functions which use these methods to build, walk and print out an AST. Make sure that they produce the expected results.
Only once you have evidence that the lexer is producing the correct tokens and that your AST construction functions work should you start to debug your parser. Again, you will find it much easier if you use the built-in debugging facilities; see the Debugging your parser section of the Bison manual for instructions.
Good luck.