I have a file that will be read in by my parser:
test.txt
BEGINING.
XXX XY-1.
XXXX Y.
XXXX Z.
BODY.
PRINT "Please enter a number?".
END.
The error is occurring around the PRINT
token. In my lexical analyser, the string "Please enter a number?"
is getting processed correctly, I know this because I outputted it to the console.
lexer.I
%{
#include <stdio.h>
#include "parser.tab.h"
#define YYSTYPE char *
%}
%option noyywrap
%option caseless
%%
(?i:BEGINING) return TERM_BEGINING;
(?i:BODY) return TERM_BODY;
(?i:END) return TERM_END;
(?i:MOVE) return TERM_MOVE;
(?i:TO) return TERM_TO;
(?i:ADD) return TERM_ADD;
(?i:INPUT) return TERM_INPUT;
(?i:PRINT) return TERM_PRINT;
[\t| ]+ // ignore whitespaces
\n // Ignore new line
[X]+ yylval.integer = yyleng; return TERM_SIZE;
[\"][^"]*[\"] yylval.string = strdup(yytext); printf("%s\n", yytext); return TERM_STR;
";" return TERM_SEPARATOR;
"." return TERM_FULLSTOP;
[0-9]+ yylval.integer = atoi(yytext); return TERM_INT;
[\_\-0-9][a-zA-Z][a-zA-Z0-9\-\_]* yylval.string = strdup(yytext); return TERM_INVALID_VARIABLE_NAME;
[A-Z][A-Z0-9\-]* yylval.string = strdup(yytext); return TERM_VARIABLE_NAME;
. return TERM_INVALID_TOKEN;
%%
//int main(int argc, char** argv) {
// if (argc != 2) {
// yyin = stdin;
// yylex();
// } else {
// FILE* aFile = fopen(argv[1], "r");
// if (!aFile) {
// printf("Cannot open file!\n");
// return -1;
// } else {
// yyin = aFile;
// yylex();
// fclose(yyin);
// }
// }
//}
Now in the parser, for debugging purposes, when the PRINT
keywor is returned from the lexical analyser I have decided to print out the word PRINT
parser.y
%{
//yylval - externalise variable in which yylex should place semantic value associated with a token
//yyparse - parser function produced by bison, call this to start parsing
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
extern FILE* yyin;
typedef int bool;
#define true 1
#define false 0
//Cause an immediate syntax error
int yyerror(char *s) {
fprintf(stderr, "%s\n", s);
exit(0);
}
//Declare yylex, user supplied lexical analyzer function to get the next token
int yylex(void);
//yywrap text wrap
//When scanner recieves EOF it checks yywrap()
int yywrap(void) {
return 1;
}
// Structure of variable
struct variable {
char* varName;
int varSize;
int varValue;
};
typedef struct {
struct variable* array;
size_t used;
size_t size;
} Array;
// Method declarations
void writeToWarning(char *aWarning);
void writeError(char *anError);
void writeToLog(char *alog);
bool checkIfDeclared(char* varName);
void initialiseArray(Array *a, size_t initialSize);
void insertArray(Array *a, struct variable *aVar);
void freeArray(Array *a);
void createVar(int size, char* varName);
void inputValues(char* varName);
%}
//each token is a terminal
%token TERM_BEGINING TERM_BODY TERM_END TERM_MOVE
TERM_TO TERM_ADD TERM_INPUT TERM_PRINT
TERM_SIZE TERM_STR TERM_SEPARATOR TERM_FULLSTOP
TERM_INT TERM_INVALID_VARIABLE_NAME TERM_VARIABLE_NAME TERM_INVALID_TOKEN
//Everything that something can be
//yyvval array from lexer.l
%union {
int integer;
char* string;
}
//Define anything that the lexer can return as a string or integer
%type<integer> TERM_INT
%type<integer> TERM_SIZE
%type<string> TERM_STR
%type<string> TERM_SEPARATOR
%type<string> TERM_FULLSTOP
%type<string> TERM_VARIABLE_NAME
%type<string> TERM_INVALID_VARIABLE_NAME
%%
// Create grammer
program:
/* empty */ |
begin middle_declarations body grammar_s end {
printf("Parsing complete\n");
exit(0);
};
begin:
TERM_BEGINING TERM_FULLSTOP;
body:
TERM_BODY TERM_FULLSTOP;
end:
TERM_END TERM_FULLSTOP;
middle_declarations:
/* empty */ |
//Left recursive to allow for many declearations
middle_declarations declaration TERM_FULLSTOP;
declaration:
TERM_SIZE TERM_VARIABLE_NAME {
createVar($1, $2);
};
grammar_s:
/* empty */ |
grammar_s grammar TERM_FULLSTOP;
grammar:
add | move | print | input;
add:
TERM_ADD TERM_VARIABLE_NAME TERM_TO TERM_VARIABLE_NAME {
//addVarToVar($2, $4);
}
TERM_ADD TERM_INT TERM_TO TERM_VARIABLE_NAME {
//addNumToVar($2, $4);
}
;
move:
TERM_MOVE TERM_VARIABLE_NAME TERM_TO TERM_VARIABLE_NAME {
//moveVarToVar($2, $4);
}
TERM_MOVE TERM_INT TERM_TO TERM_VARIABLE_NAME {
//moveNumToVar($2, $4);
}
;
print:
TERM_PRINT rest_of_print {
printf("PRINT\n");
};
rest_of_print:
/* empty */ |
rest_of_print other_print;
other_print:
TERM_VARIABLE_NAME {
//printVarName($1);
}
TERM_INVALID_VARIABLE_NAME {
//printInvalidVarName($1);
}
TERM_SEPARATOR {
printf("%s", $1);
}
TERM_STR {
printf("STRING%s%s", $1, "\n");
}
;
input:
// Fullstop declares grammar
TERM_INPUT other_input;
other_input:
/* empty */ |
// Input var1
TERM_VARIABLE_NAME {
inputValues($1);
}
// Can be input var1
TERM_VARIABLE_NAME TERM_SEPARATOR {
//inputValues($2);
}
;
%%
Array a;
void initialiseArray(Array *a, size_t initialSize) {
a->array = (struct variable*) malloc(initialSize * sizeof(struct variable));
a->used = 0;
a->size = initialSize;
}
void insertArray(Array *a, struct variable *aVar) {
if (a->used == a->size) {
a->size *= 6;
a->array = (struct variable*) realloc(a->array, a->size * sizeof(struct variable));
}
a->array[a->used++] = *aVar;
}
void freeArray(Array *a) {
free(a->array);
a->array = NULL;
a->used = a->size = 0;
}
void createVar(int size, char* varName) {
printf("SIZE: %d\n", size);
printf("NAME: %s\n", varName);
bool alreadyDeclared;
alreadyDeclared = checkIfDeclared(varName);
if (!alreadyDeclared) {
struct variable aVar;
aVar.varName = varName;
aVar.varSize = size;
aVar.varValue = 0;
insertArray(&a, &aVar);
writeToLog("Created variable succesfully\n");
} else {
writeToWarning("Variable has already been declared\n");
}
}
bool checkIfDeclared(char* varName) {
for (int i = 0; i < a.used; i++) {
if (strcmp(a.array[i].varName, varName) == 0) {
return 1;
}
}
return 0;
}
void inputValues(char* varName) {
printf("VAR_NAME: %s\n", varName);
}
int main(int argc, char* argv[]) {
if (argc != 2) {
yyin = stdin;
yylex();
} else {
FILE* aFile = fopen(argv[1], "r");
if (!aFile) {
printf("Cannot open file!\n");
return -1;
} else {
int i;
initialiseArray(&a, 5);
yyin = aFile;
yyparse();
}
}
fclose(yyin);
freeArray(&a);
// for (int i = 0; i < 10; i++) {
// struct variable aVar;
// aVar.varName = "TEST_VAR";
// aVar.varSize = 0;
// aVar.varValue = 0;
// insertArray(&a, &aVar);
//}
//for (int j = 0; j < a.used; j++) {
// printf("Name: %s\n", a.array[j].varName);
//}
//printf("Name: %s\n", a.array[9].varName);
//printf("Used: %zu\n", a.used);
//yyparse();
//if (argc > 0) {
// freeArray(&a);
// fclose(yyin);
//}
return 0;
}
void writeToWarning(char *aWarning){
printf("Warning : %s%s", aWarning,"\n");
}
void writeError(char *anError){
printf("Error : %s%s", anError,"\n");
}
void writeToLog(char *alog){
printf("Success : %s%s", alog,"\n");
}
This is how I compile
bison -d parser.y -o parser.tab.c
flex -o lexer.yy.c lexer.I
gcc lexer.yy.c parser.tab.c -o parser
This is how I run
./parser test.txt
Then this is the output that I get
SIZE: 3 // this is output from the createVar function
NAME: XY-1
Success : Created variable succesfully
SIZE: 4
NAME: Y
Success : Created variable succesfully
SIZE: 4
NAME: Z
Success : Created variable succesfully
"Please enter a number?" // This is output for debugging from the lexer
PRINT // This is output for debugging from the parser,
syntax error
Leaving out the actions (which do not change the grammar) your other_print
rule is:
other_print:
TERM_VARIABLE_NAME TERM_INVALID_VARIABLE_NAME TERM_SEPARATOR TERM_STR
That odd sequence of terminals is not present in your input file, so the production won't match, leading to a syntax error report.
It seems unlikely that you actually wanted that syntax; my guess is that you intended the four terminals to be alternatives rather than a concatenation. If so, you need to separate them with |
s.
other_input
has a similar issue.