I am trying to make a c minus compiler am at the stage were i have to make an Abstract syntax tree and a symbol Table. Also to be to handle errors is there any chance i can have an error handling without the abstract syntax tree ? This is my output in terminal:
syntan.y: In function ‘yyparse’:
syntan.y:70:64: warning: assignment to ‘ASTnode *’ {aka ‘struct ASTnodetype *’} from incompatible pointer type ‘int *’ [-Wincompatible-pointer-types]
70 | program: declaration_list { prog = $1; }
| ^
syntan.y:73:76: error: request for member ‘left’ in something not a structure or union
73 | declaration_list: declaration_list declaration { $1 -> left = $2; $$ = $1; }
| ^~
syntan.y:86:36: error: ‘VOIDDEC’ undeclared (first use in this function)
86 | if ($1 != VOIDDEC && $1 != INTDEC)
| ^~~~~~~
syntan.y:86:36: note: each undeclared identifier is reported only once for each function it appears in
Yacc file
%{
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "y.tab.h"
#include "asttree.h"
#include "symboltable.h"
void yyerror(const char *s);
extern int yylex(void);
extern int yyparse(void);
extern char *yytext;
extern FILE *yyin;
char filename[50];
static int level = 0;
static int offset = 0;
static int goffset = 0;
static int maxoffset = 0;
void yyerror(const char* s);
extern int yylineno;
%}
%union
{
int intv;
char *string;
ASTnode *node;
enum OPERATORS op;
};
%start program
%token<intv>T_NUM
%token IF WHILE ELSE
%token RETURN INT VOID
%token GREATEQ LESS SMALLEQ EQUALITY NOTEQ BIGGER
%token TIMES DIVIDE PLUS MINUS EQUAL QUESTIONM COMMA
%token LBRAC RBRAC LPAR RPAR LCURLY RCURLY
%token<string>T_ID
%left PLUS MINUS
%left TIMES DIVIDE
%left LPAR RPAR
%right ELSE
%type<node> declaration_list declaration var_declaration
%type<node> program
%type<node> fun_declaration additive_expression
%type<node> params param_list param
%type<node> compound_stmt local_declarations statement_list statement
%type<node> expression_stmt selection_stmt iteration_stmt return_stmt
%type<node> expression var simple_expression term factor call args arg_list
%type<op> relop addop mulop type_specifier
%%
program: declaration_list { prog = $1; }
;
declaration_list: declaration_list declaration { $1 -> left = $2; $$ = $1; }
| declaration { $$ = $1; }
;
declaration: var_declaration { $$ = $1; }
| fun_declaration { $$ = $1; }
;
var_declaration: type_specifier T_ID QUESTIONM
{
if(Search($2, level, 0))
yyerror("Redefined variable declaration");
if ($1 != VOIDDEC && $1 != INTDEC)
yyerror("Invalid type specifier");
$$ = ASTCreateNode(VARDEC);
$$ -> name = $2;
/* we use the op to determine its type while printing*/
$$ -> op = $1;
$$ -> symbol = Insert($2, $1, 0, level, 1, offset, NULL);
$$ -> isType = $1;
offset += 1;
if(offset > maxoffset)
maxoffset = offset;
}
| type_specifier T_ID LBRAC T_NUM RBRAC QUESTIONM
{
if(Search($2, level, 0))
yyerror("Redefined variable declaration");
$$ = ASTCreateNode(VARDEC);
$$ -> name = $2;
$$ -> op = $1;
$$ -> value = $4;
$$ -> symbol = Insert($2, $1, 2, level, $4, offset, NULL);
$$ -> isType = $1;
offset += $4;
if (offset > maxoffset)
maxoffset = offset;
}
;
type_specifier: INT { $$ = INTDEC; }
| VOID { $$ = VOIDDEC; }
;
fun_declaration: type_specifier T_ID LPAR
{
if(Search($2, level, 0))
yyerror("Redefined function declaration");
Insert($2, $1, 1, level, 1, 0, NULL);
goffset = offset;
offset = 2;
if(offset > maxoffset)
maxoffset = offset;
}
params
{
(Search($2, 0, 0)) -> fparms = $5;
}
RPAR compound_stmt
{
$$ = ASTCreateNode(FUNCTIONDEC);
$$ -> name = $2;
/* we use the op to determine its type while printing*/
$$ -> op = $1;
/* s1 links to the params which can be void
or a paramList */
$$ -> s1 = $5;
/* right links to the compund statement,
called a BLOCK in the enumerated type */
$$ -> right = $8;
/*get the symbtab entry we made earlier*/
$$ -> symbol = Search($2, 0, 0);
/*Remove symbols put in, in the function call*/
offset -= Delete(1);
level = 0;
$$ -> value = maxoffset;
//we change this in the symbol table because it is not used
//anywhere else for functions. We have access to this
//in calls, so we can use it to determine where the
//stack pointer is going to have to be
$$ -> symbol -> mysize = maxoffset;
/*change the offset back to the global offset*/
offset = goffset;
maxoffset = 0;
}
;
params: param_list { $$ = $1; }
| VOID { $$ = NULL; }
;
param_list: param_list COMMA param
{
$1 -> left = $3;
$$ = $1;
}
| param { $$ = $1; }
;
param: type_specifier T_ID
{
if(Search($2, level, 0))
yyerror("Redefined variable declaration");
$$ = ASTCreateNode(PARAM);
$$ -> name = $2;
/* we use the op to determine its type while printing*/
$$ -> op = $1;
/* if value is 0 it is not an array, used for printing */
$$ -> value = 0;
/*inherit the type*/
$$ -> isType = $1;
$$ -> symbol = Insert($2, $1, 0, level+1, 1, offset, NULL);
offset += 1;
if(offset > maxoffset)
maxoffset = offset;
}
| type_specifier T_ID LBRAC RBRAC
{
if(Search($2, level, 0))
yyerror("Redefined variable declaration");
$$ = ASTCreateNode(PARAM);
$$ -> name = $2;
/* we use the op to determine its type while printing*/
$$ -> op = $1;
/* there was an array param */
$$ -> value = 1;
/*inherit the type*/
$$ -> isType = $1;
/*2 is used for IsAFunc to show its an array ref*/
$$ -> symbol = Insert($2, $1, 2, level+1, 1, offset, NULL);
offset += 1;
if(offset > maxoffset)
maxoffset = offset;
}
;
compound_stmt: LCURLY
{ level++; }
local_declarations statement_list RCURLY
{
$$ = ASTCreateNode(BLOCK);
if( $3 == NULL )
$$ -> right = $4;
else {
ASTattachleft($3, $4);
$$ -> right = $3;
}
offset -= Delete(level);
level--;
}
;
local_declarations: local_declarations var_declaration
{
if($1 != NULL)
{
$1 -> left = $2;
$$ = $1;
}
else
$$ = $2;
}
| { $$ = NULL; }
;
statement_list: statement_list statement
{
if( $1 != NULL )
{
$1 -> left = $2;
$$ = $1;
}
else
$$ = $2;
}
| { $$ = NULL; }
;
statement: expression_stmt { $$ = $1; }
| compound_stmt { $$ = $1; }
| selection_stmt { $$ = $1; }
| iteration_stmt { $$ = $1; }
| return_stmt { $$ = $1; }
;
expression_stmt: expression QUESTIONM
{
$$ = ASTCreateNode(EXPRSTMT);
$$ -> right = $1;
$$ -> isType = $1 -> isType;
}
| QUESTIONM { $$ = NULL; }
;
selection_stmt: IF LPAR expression RPAR statement
{
$$ = ASTCreateNode(IFSTMT);
$$ -> right = $3;
$$ -> s1 = $5;
}
| IF LPAR expression RPAR statement ELSE statement
{
$$ = ASTCreateNode(IFSTMT);
$$ -> right = $3;
$$ -> s1 = $5;
$$ -> s2 = $7;
}
;
iteration_stmt: WHILE LPAR expression RPAR statement
{
$$ = ASTCreateNode(ITERSTMT);
$$ -> right = $3;
$$ -> s1 = $5;
}
;
return_stmt: RETURN QUESTIONM { $$ = ASTCreateNode(RETURNSTMT); }
| RETURN expression QUESTIONM
{
$$ = ASTCreateNode(RETURNSTMT);
$$ -> s2 = $2;
}
;
expression: var EQUAL expression
{
if (($1 -> isType != $3 -> isType) || ($1 -> isType == VOIDDEC))
yyerror("Incompatible types in assignment");
$$ = ASTCreateNode(ASSIGN);
/* hold the link to the var node*/
$$ -> right = $1;
/* hold the link to the expression statement*/
$$ -> s1 = $3;
/*inherit the type, already check for equivalence so can just use $1*/
$$ -> isType = $1 -> isType;
$$ -> name = CreateTemp();
$$ -> symbol = Insert($$ -> name, $$ -> isType, 0, level, 1, offset, NULL);
offset += 1;
if(offset > maxoffset)
maxoffset = offset;
}
| simple_expression { $$ = $1; }
;
var: T_ID
{
struct SymbTab * p;
if ((p=Search($1,level,1)) != NULL)
{
$$ = ASTCreateNode(IDENT);
$$-> name = $1;
$$-> symbol =p;
/*give the node op Type, based on SymbTab*/
$$->isType=p->Type;
if (p->IsAFunc == 2)
yyerror("Variable is an array");
}
else
yyerror("Undefined variable");
}
| T_ID LBRAC expression RBRAC
{
struct SymbTab * p;
if ((p=Search($1,level,1)) != NULL)
{
$$ = ASTCreateNode(IDENT);
$$ -> name = $1;
/* hold expression inside of array reference */
$$ -> right = $3;
$$ -> symbol = p;
/*capital Type is enum op*/
$$ -> isType = p -> Type;
if (p->IsAFunc != 2)
yyerror("Variable is not an array");
}
else
yyerror("Undefined variable");
}
;
simple_expression: additive_expression relop additive_expression
{
if (($1 -> isType != $3 -> isType) || ($1 -> isType == VOIDDEC))
yyerror("Type mismatch or void in simpleExpression");
$$ = ASTCreateNode(EXPR);
$$ -> op = $2;
$$ -> left = $1;
$$ -> right = $3;
/*inherit the type, already check for equivalence so can just use $1*/
$$ -> isType = $1 -> isType;
$$ -> name = CreateTemp();
$$ -> symbol = Insert($$ -> name, $$ -> isType, 0, level, 1, offset, NULL);
offset += 1;
if(offset > maxoffset)
maxoffset = offset;
}
| additive_expression { $$ = $1; }
;
relop: BIGGER { $$ = BIGGER; }
| LESS { $$ = LESS; }
| SMALLEQ { $$ = SMALLEQ; }
| EQUALITY { $$ = EQUAL; }
| NOTEQ { $$ = NOTEQ; }
| GREATEQ { $$ = GREATEQ; }
;
additive_expression: additive_expression addop term
{
if (($1 -> isType != $3 -> isType) || ($1 -> isType == VOIDDEC))
yyerror("Type mismatch or void in additive exp");
$$ = ASTCreateNode(EXPR);
$$ -> op = $2;
$$ -> left = $1;
$$ -> right = $3;
/*inherit the type, already check for equivalence so can just use $1*/
$$ -> isType = $1 -> isType;
$$ -> name = CreateTemp();
$$ -> symbol = Insert($$ -> name, $$ -> isType, 0, level, 1, offset, NULL);
offset += 1;
if(offset > maxoffset)
maxoffset = offset;
}
| term { $$ = $1; }
;
addop: PLUS { $$ = PLUS; }
| MINUS { $$ = MINUS; }
;
term: term mulop factor
{
if (($1 -> isType != $3 -> isType) || ($1 -> isType == VOIDDEC))
yyerror("Type mismatch or void in term/factor exp");
$$ = ASTCreateNode(EXPR);
$$ -> op = $2;
$$ -> left = $1;
$$ -> right = $3;
/*inherit the type, already check for equivalence so can just use $1*/
$$ -> isType = $1 -> isType;
$$ -> name = CreateTemp();
$$ -> symbol = Insert($$ -> name, $$ -> isType, 0, level, 1, offset, NULL);
offset += 1;
if(offset > maxoffset)
maxoffset = offset;
}
| factor { $$ = $1; }
;
mulop: TIMES { $$ = TIMES; }
| DIVIDE { $$ = DIVIDE; }
;
factor: LPAR expression RPAR { $$ = $2; }
| var { $$ = $1; }
| call { $$ = $1; }
| T_NUM
{
$$ = ASTCreateNode(NUMBER);
$$ -> value = $1;
/*numbers are always ints here*/
$$ -> isType = INTDEC;
}
;
call: T_ID LPAR args RPAR
{
struct SymbTab * p;
if ((p = Search($1, 0, 1)) != NULL)
{ /*make sure symbol is a function*/
if(p -> IsAFunc != 1)
yyerror("Is a variable, but was called as function");
if (compareFormals(p -> fparms, $3) != 1)
yyerror("Wrong type of arguments");
$$ = ASTCreateNode(CALLSTMT);
/* hold the link to args in right*/
$$ -> right = $3;
$$ -> name = $1;
$$ -> symbol = p;
$$ -> isType = p -> Type;
}
else
yyerror("Call to undefined function");
}
;
args: arg_list { $$ = $1; }
| { $$ = NULL; }
;
arg_list: arg_list COMMA expression
{
$$ = ASTCreateNode(ARGLIST);
$$ -> left = $3;
$$ -> right = $1;
}
| expression
{
$$ = ASTCreateNode(ARGLIST);
$$ -> right = $1;
}
;
%%
** AST file**
#include "asttree.h"
int lineCounter = 0;
int labelCounter = 0;
int tempCounter = 0;
ASTnode *ASTCreateNode(enum ASTtype mytype)
{
ASTnode * p;
p = (ASTnode *)malloc(sizeof(ASTnode));
p -> type = mytype;
p -> left = NULL;
p -> right = NULL;
p -> s1 = NULL;
p -> s2 = NULL;
p -> value = 0;
p -> name = NULL;
p -> str = NULL;
p -> symbol = NULL;
p -> isType = null;
return(p);
}
/* attach q to the left most part of p */
void ASTattachleft(ASTnode* p,ASTnode* q)
{
while (p -> left != NULL)
p = p -> left;
p -> left = q; /*add on the left side of the tree*/
}
/*
* Printing Tabbing
*/
void PT(int howmany)
{
int i;
for (i = 0; i < howmany; i++)
printf(" ");
}
/* AST PRINT */
void ASTprint(int level, ASTnode* p)
{
if (p == NULL )
return;
else
{
PT(level);
switch (p->type)
{
case VARDEC :
printf("Variable "); //one per ast node type
if ((p -> op) == INTDEC)
printf("INT ");
if (p -> op == VOIDDEC)
printf("VOID ");
printf("%s\n", p -> name);
if (p -> value > 0)
{//is an array
printf("[%d]\n", p -> value);
}
lineCounter++;
break;
case FUNCTIONDEC:
if (p -> op == INTDEC)
printf("INT ");
if (p -> op == VOIDDEC)
printf("VOID ");
printf("FUNCTION %s \n", p -> name);
if (p -> s1 == NULL)
{
PT(level + 2);
printf( "(VOID)\n" );
}
else
{
PT(level + 2);
printf("( \n");
ASTprint(level + 2, p -> s1);
PT(level + 2);
printf(") \n");
}
ASTprint(level + 2,p -> right);
break;
case EXPR: printf("EXPR ");
if (p -> name != NULL)
printf(" %s = ", p->name);
ASTnode* node1 = p -> right;
ASTnode* node2 = p -> left;
switch (p -> op)
{
case PLUS:
printf("+");
break;
case MINUS:
printf("-");
break;
case TIMES:
printf("*");
break;
case DIVIDE:
printf("/");
break;
case SMALLEQ:
printf("<=");
break;
case LESS:
printf("<");
break;
case BIGGER:
printf(">");
break;
case GREATEQ:
printf(">=");
break;
case NOTEQ:
printf("!=");
break;
case EQUAL:
printf("==");
break;
case INTDEC:
case VOIDDEC:
case null:
break;
}
printf("\n");
ASTprint(level + 1, p -> left);
ASTprint(level + 1, p -> right);
break;
case RETURNSTMT:
printf("Return statement\n");
ASTprint(level + 1, p -> right);
break;
case IDENT:
printf("IDENTIFIER %s\n", p->name);
if (p -> right != NULL)
{
PT(level);
printf("Array reference [\n");
ASTprint(level + 1, p -> right);
PT(level);
printf("] end array\n");
}
break;
case BLOCK:
printf("compound_stmt\n");
ASTprint(level + 1, p -> right);
break;
case ASSIGN:
printf("Assignment STATEMENT\n");
ASTprint(level + 1, p -> right);
ASTprint(level + 1, p -> s1);
break;
case NUMBER:
printf("NUMBER with value %d\n", p->value);
break;
case ITERSTMT:
printf("WHILE STATEMENT\n");
ASTprint(level + 1, p -> right);
printf("\n");
ASTprint(level + 1, p -> s1);
printf("\n");
break;
case PARAM:
printf("PARAMETER ");
printf(" %s ", p -> name);
if(p -> value == -1)
printf("[]");
printf("\n");
break;
case IFSTMT:
printf("IF STATEMENT\n");
ASTprint(level + 1, p -> right);
printf("\n");
ASTprint(level + 1, p -> s1);
printf("\n");
if(p -> s2 != NULL)
{
PT(level);
printf("ELSE \n");
ASTprint(level + 2, p -> s2);
}
break;
case CALLSTMT:
printf("Function Call %s\n" , p ->name);
if (p -> right != NULL)
{
ASTprint(level + 2, p -> right);
printf("\n");
} else {
PT(level + 2);
printf("(VOID)\n");
}
break;
case ARGLIST:
printf("ARG\n");
ASTnode* node4 = p -> right;
ASTprint(level + 1, p -> right);
break;
case EXPRSTMT:
printf("Expression Statement\n");
ASTnode* node3 = p -> right;
ASTprint(level + 1, p -> right);
break;
default: printf("Unknown type in ASTprint\n");
break;
}
if (p -> type != EXPR)
ASTprint(level, p -> left);
}
}
/*
* COMPAREFORMALS
* Used to compare parms declared to the ones used to call
*/
int compareFormals(ASTnode * p, ASTnode *q)
{
if((p == NULL) && (q == NULL))
return(1); //they were both void
else if ((p == NULL) || (q == NULL))
return (0); //one is void, already checked for both so p!=q
else if(p -> isType == q -> right -> isType)
compareFormals(p -> left, q -> left); //check the next param
else
return(0);
}
any suggestion would be helpfull!
Despite the hundreds of lines of code pasted into the question, I'm not sure that there is really enough to go on, particularly since you neither include all of the error messages you received nor do you include your header files. So this is going to be more of a guess than anything else. It's usually a good idea to try compiling your code before you write so much of it, particularly if you're going to be asked to provide a minimal reproducible example. But that's all a digression.
What I noticed in your grammar file, right at the beginning, are your includes:
#include "y.tab.h"
#include "asttree.h"
#include "symboltable.h"
The first one is wrong. That's the header file generated by Bison, and Bison is going to include everything that's in that file into the generated C file. So you don't need the #include "y.tab.h"
, and moreover, it's probably the cause of most of your errors, because it's being included too early. The problem is that ASTNode
is (I assume) defined in asttree.h
, and it's clear from the above snippet that asttree.h
has not yet been included when y.tab.h
is processed. That can't work, because the semantic type is declared in y.tab.h
; the declaration will be something like:
union YYSTYPE
{
int intv;
char *string;
ASTnode *node;
enum OPERATORS op;
};
which requires ASTnode
to have already been declared. But it hasn't been, so I'm pretty sure that will generate an error, even if you didn't remember to compile with -Wall
(which you should do; Bison doesn't produce code which generates warnings). Since ASTnode
hasn't been declared yet at that point and the compiler wants to be able to continue trying to compile, it will have to make node
be a pointer to something; apparently, it used int
. That means that later on when you try to use a variable whose type is supposedly an ASTnode*
, the compiler acts as though that variable were an int*
, and of course that makes it impossible to refer to the member objects pointed to by that variable, because int
is not a union or structure, as the error message says. Furthermore, you can't assign the (supposed) int*
to prog
because (I guess) prog
was declared in a part of the program in which ASTnode
's declaration is visible, so it is actually a pointer to an ASTnode
.
When you get a huge number of error messages from a compilation, it's often a good idea to start fixing things at the beginning; at some point, the compiler's error recovery is going to start being unuseful and the later error messages may simply be artefacts. You will soon be able to see how this works as you try to implement error recovery in your own compiler.
The bottom line is that you should just remove #include "y.tab.h"
from your parser file. It's only needed in the lexer. I doubt whether that is the only error, but it might help you advance a bit with the project.