I am trying to understand the basic concepts of creating a compiler. I've tried to write one very basic c compiler and I encountered some problems. When I am trying to print where a syntax error has occurred with the variable yylineno, I get some of the line numbers wrong. Does anyone know why? I've searched the internet but I can't find a clear answer ,thanks.
comp.l file:
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern int yylineno;
extern FILE* yyin;
//extern char* yytext;
void yyerror(char *s);
%}
%union {char* var;}
%token INT FLOAT CHAR DOUBLE VOID
%token FOR WHILE
%token IF ELSE PRINTF
%token STRUCT
%token NUM
%token INCLUDE
%token DOT
%token <var> ID
%right '='
%left AND OR
%left '<' '>' LE GE EQ NE LT GT
%error-verbose
%%
start: Function
| Declaration
;
/* Declaration block */
Declaration: Type Assignment ';'
| Assignment ';'
| FunctionCall ';'
| ArrayUsage ';'
| Type ArrayUsage ';'
| StructStmt ';'
| Arg ';'
| error {yyerrok;}
;
/* Assignment block */
Assignment: ID '=' Assignment
| ID '=' FunctionCall
| ID '=' ArrayUsage
| ArrayUsage '=' Assignment
| ID ',' Assignment
| NUM ',' Assignment
| ID '+' Assignment
| ID '-' Assignment
| ID '*' Assignment
| ID '/' Assignment
| NUM '+' Assignment
| NUM '-' Assignment
| NUM '*' Assignment
| NUM '/' Assignment
| '\'' Assignment '\''
| '(' Assignment ')'
| '-' '(' Assignment ')'
| '-' NUM
| '-' ID
| NUM
| ID
;
/* Function Call Block */
FunctionCall : ID'('')'
| ID'('Assignment')'
;
/* Array Usage */
ArrayUsage : ID'['Assignment']'
| ID'['error ']' {yyerrok;}
;
/* Function block */
Function: Type ID '(' ArgListOpt ')' CompoundStmt
ArgListOpt: ArgList
|
;
ArgList: ArgList ',' Arg
| Arg
;
Arg: Type ID
;
CompoundStmt: CompoundStmt '{' StmtList '}'
| '{' StmtList '}'
| '{' StmtList {yyerror("Missing '}'"); YYERROR;}
| StmtList '}' {yyerror("Missing '{'"); YYERROR;}
;
StmtList: StmtList Stmt
|
;
Stmt: WhileStmt
| Declaration
| ForStmt
| IfStmt
| PrintFunc
| ';'
;
/* Type Identifier block */
Type: INT
| FLOAT
| CHAR
| DOUBLE
| VOID
;
/* Loop Blocks */
WhileStmt: WHILE '(' Expr ')' Stmt
| WHILE '(' Expr ')' CompoundStmt
;
/* For Block */
ForStmt: FOR '(' Expr ';' Expr ';' Expr ')' Stmt
| FOR '(' Expr ';' Expr ';' Expr ')' CompoundStmt
| FOR '(' Expr ')' Stmt
| FOR '(' Expr ')' CompoundStmt
;
/* IfStmt Block */
IfStmt : IF '(' Expr ')' Stmt
;
/* Struct Statement */
StructStmt : STRUCT ID '{' Type Assignment '}'
;
/* Print Function */
PrintFunc : PRINTF '(' Expr ')' ';'
;
/*Expression Block*/
Expr:
| Expr LE Expr
| Expr GE Expr
| Expr NE Expr
| Expr EQ Expr
| Expr GT Expr
| Expr LT Expr
| Assignment
| ArrayUsage
;
%%
int count = 0;
int main() {
int i;
for(i=0; i<100; i++) {
variables[i] = " ";
}
yyin = stdin;
do {
yyparse();
} while(!feof(yyin));
return 0;
}
void yyerror(char* s) {
printf("Error : %s at line %d \n", s, yylineno);
}
comp.y file:
alpha [a-zA-Z]
digit [0-9]
%{
#include "y.tab.h"
#include <stdio.h>
int line_n = 1;
%}
%option nodefault yylineno
%%
[\t\n]+ {;}
"int" {return INT;}
"float" {return FLOAT;}
"char" { return CHAR;}
"void" {return VOID;}
"double" {return DOUBLE;}
"for" {return FOR;}
"while" {return WHILE;}
"if" {return IF;}
"else" {return ELSE;}
"printf" {return PRINTF;}
"struct" {return STRUCT;}
^"#include ".+ {;}
{digit}+ {return NUM;}
{alpha}({alpha}|{digit})* {yylval.var = strdup(yytext);return ID;}
"<=" {return LE;}
">=" {return GE;}
"==" {return EQ;}
"!=" {return NE;}
">" {return GT;}
"<" {return LT;}
"." {return DOT;}
\/\/.* {;}
\/\*(.*\n)*.*\*\/ {;}
[ \t\r\n]+ {;}
. {return *yytext;}
%%
int yywrap (void) {return 1;}
For example, when i try to check to following code:
1.int main(){
2. int a
3.
4.
5.
6. int o
7.}
I get :
Error : syntax error, unexpected INT, expecting ';' at line 6
Error : syntax error, unexpected '}', expecting ';' at line 7
In the C code example the line numbers of the errors are correct. There is no requirement for the missing ;
after int a
on line 2 to be on that line and not on, say, line 4, 5, or even line 6 as ;int o
.
The int o
on line 6 also has a missing ;
which is only revealed when }
is met on line 7.
Neither of those missing ;
is required to be on the same line as its definition, so the first line number where it might have appeared is not reported. Instead the last line number where it must have appeared is reported.
Your example pasted into a MSVC program reports the same pattern of line numbers for the errors.