Search code examples
bisonflex-lexeryacclexlexical-analysis

Syntax error in bison when reading token


I have a file that will be read in by my parser:

test.txt

BEGINING.

XXX XY-1.

XXXX Y.

XXXX Z.

BODY.

PRINT "Please enter a number?".

END.

The error is occurring around the PRINT token. In my lexical analyser, the string "Please enter a number?" is getting processed correctly, I know this because I outputted it to the console.

lexer.I

%{
#include <stdio.h>

#include "parser.tab.h"
#define YYSTYPE char *
%}

%option noyywrap 
%option caseless

%%

(?i:BEGINING) return TERM_BEGINING;
(?i:BODY) return TERM_BODY;
(?i:END) return TERM_END;
(?i:MOVE) return TERM_MOVE;
(?i:TO) return TERM_TO;
(?i:ADD) return TERM_ADD;
(?i:INPUT) return TERM_INPUT;
(?i:PRINT) return TERM_PRINT;


[\t| ]+ // ignore whitespaces

\n // Ignore new line

[X]+ yylval.integer = yyleng; return TERM_SIZE;

[\"][^"]*[\"] yylval.string = strdup(yytext); printf("%s\n", yytext); return TERM_STR;

";" return TERM_SEPARATOR;

"." return TERM_FULLSTOP;

[0-9]+ yylval.integer = atoi(yytext); return TERM_INT;

[\_\-0-9][a-zA-Z][a-zA-Z0-9\-\_]* yylval.string = strdup(yytext); return TERM_INVALID_VARIABLE_NAME;

[A-Z][A-Z0-9\-]* yylval.string = strdup(yytext); return TERM_VARIABLE_NAME;

. return TERM_INVALID_TOKEN;

%%

//int main(int argc, char** argv) {
//  if (argc != 2) {
//      yyin = stdin;
//      yylex();
//  } else {
//      FILE* aFile = fopen(argv[1], "r");
//      if (!aFile) {
//          printf("Cannot open file!\n");
//          return -1;
//      } else {
//          yyin = aFile;
//          yylex();
//          fclose(yyin);
//      }
//  }
//}

Now in the parser, for debugging purposes, when the PRINT keywor is returned from the lexical analyser I have decided to print out the word PRINT

parser.y

%{
//yylval - externalise variable in which yylex should place semantic value associated with a token
//yyparse - parser function produced by bison, call this to start parsing

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>

extern FILE* yyin;
typedef int bool;

#define true 1
#define false 0

//Cause an immediate syntax error
int yyerror(char *s) {
  fprintf(stderr, "%s\n", s);
  exit(0);
}

//Declare yylex, user supplied lexical analyzer function to get the next token
int yylex(void);

//yywrap text wrap
//When scanner recieves EOF it checks yywrap()
int yywrap(void) {
    return 1;
}

// Structure of variable
struct variable {
    char* varName;
    int varSize;
    int varValue;
};

typedef struct {
    struct variable* array;
    size_t used;
    size_t size;
} Array;

// Method declarations
void writeToWarning(char *aWarning);
void writeError(char *anError);
void writeToLog(char *alog);
bool checkIfDeclared(char* varName);

void initialiseArray(Array *a, size_t initialSize);
void insertArray(Array *a, struct variable *aVar);
void freeArray(Array *a);

void createVar(int size, char* varName);
void inputValues(char* varName);


%}

//each token is a terminal
%token TERM_BEGINING TERM_BODY TERM_END TERM_MOVE 
        TERM_TO TERM_ADD TERM_INPUT TERM_PRINT  
        TERM_SIZE TERM_STR TERM_SEPARATOR TERM_FULLSTOP 
        TERM_INT TERM_INVALID_VARIABLE_NAME TERM_VARIABLE_NAME TERM_INVALID_TOKEN

//Everything that something can be
//yyvval array from lexer.l
%union {
    int integer;
    char* string;
}

//Define anything that the lexer can return as a string or integer
%type<integer> TERM_INT
%type<integer> TERM_SIZE
%type<string> TERM_STR
%type<string> TERM_SEPARATOR
%type<string> TERM_FULLSTOP
%type<string> TERM_VARIABLE_NAME
%type<string> TERM_INVALID_VARIABLE_NAME

%%

// Create grammer

program:
    /* empty */ | 
    begin middle_declarations body grammar_s end {
        printf("Parsing complete\n");
        exit(0);
    };

begin:
    TERM_BEGINING TERM_FULLSTOP;

body:
    TERM_BODY TERM_FULLSTOP;

end:
    TERM_END TERM_FULLSTOP;

middle_declarations:
    /* empty */ |
    //Left recursive to allow for many declearations
    middle_declarations declaration TERM_FULLSTOP;

declaration:
    TERM_SIZE TERM_VARIABLE_NAME {
        createVar($1, $2);
    };

grammar_s:
    /* empty */ |
    grammar_s grammar TERM_FULLSTOP;

grammar:
    add | move | print | input;

add:
    TERM_ADD TERM_VARIABLE_NAME TERM_TO TERM_VARIABLE_NAME {
        //addVarToVar($2, $4);
    }

    TERM_ADD TERM_INT TERM_TO TERM_VARIABLE_NAME {
        //addNumToVar($2, $4);
    }

    ;

move:
    TERM_MOVE TERM_VARIABLE_NAME TERM_TO TERM_VARIABLE_NAME {
        //moveVarToVar($2, $4);
    }

    TERM_MOVE TERM_INT TERM_TO TERM_VARIABLE_NAME {
        //moveNumToVar($2, $4);
    }

    ;

print:
    TERM_PRINT rest_of_print {
        printf("PRINT\n");
    };

rest_of_print:
    /* empty */ |
    rest_of_print other_print;

other_print:
    TERM_VARIABLE_NAME {
        //printVarName($1);
    }

    TERM_INVALID_VARIABLE_NAME {
        //printInvalidVarName($1);
    }

    TERM_SEPARATOR {
        printf("%s", $1);
    }

    TERM_STR {
        printf("STRING%s%s", $1, "\n");
    }

    ;

input:
    // Fullstop declares grammar
    TERM_INPUT other_input;

other_input:

    /* empty */ |
    // Input var1
    TERM_VARIABLE_NAME {
        inputValues($1);
    }

    // Can be input var1
    TERM_VARIABLE_NAME TERM_SEPARATOR {
        //inputValues($2);
    }

    ;

%%

Array a;

void initialiseArray(Array *a, size_t initialSize) {
    a->array = (struct variable*) malloc(initialSize * sizeof(struct variable));
    a->used = 0;
    a->size = initialSize;
}

void insertArray(Array *a, struct variable *aVar) {
    if (a->used == a->size) {
        a->size *= 6;
        a->array = (struct variable*) realloc(a->array, a->size * sizeof(struct variable));
    }
    a->array[a->used++] = *aVar;
}

void freeArray(Array *a) {
    free(a->array);
    a->array = NULL;
    a->used = a->size = 0;
}

void createVar(int size, char* varName) {

    printf("SIZE: %d\n", size);
    printf("NAME: %s\n", varName);

    bool alreadyDeclared;
    alreadyDeclared = checkIfDeclared(varName);
    if (!alreadyDeclared) {
        struct variable aVar;
        aVar.varName = varName;
        aVar.varSize = size;
        aVar.varValue = 0;

        insertArray(&a, &aVar);

        writeToLog("Created variable succesfully\n");
    } else {
        writeToWarning("Variable has already been declared\n");
    }
}

bool checkIfDeclared(char* varName) {
    for (int i = 0; i < a.used; i++) {
        if (strcmp(a.array[i].varName, varName) == 0) {
            return 1;
        }
    }
    return 0;
}

void inputValues(char* varName) {
    printf("VAR_NAME: %s\n", varName);
}

int main(int argc, char* argv[]) {

    if (argc != 2) {
        yyin = stdin;
        yylex();
    } else {
        FILE* aFile = fopen(argv[1], "r");
        if (!aFile) {
            printf("Cannot open file!\n");
            return -1;
        } else {
            int i;
            initialiseArray(&a, 5);

            yyin = aFile;
            yyparse();
        }
    }

    fclose(yyin);
    freeArray(&a);



 // for (int i = 0; i < 10; i++) {
    //  struct variable aVar;
    //  aVar.varName = "TEST_VAR";
    //  aVar.varSize = 0;
    //  aVar.varValue = 0;

    //  insertArray(&a, &aVar);
    //}

    //for (int j = 0; j < a.used; j++) {
    //  printf("Name: %s\n", a.array[j].varName);
    //}
    //printf("Name: %s\n", a.array[9].varName);
    //printf("Used: %zu\n", a.used);

    //yyparse();

    //if (argc > 0) {
    //  freeArray(&a);
    //  fclose(yyin);
    //}
    return 0;
}

void writeToWarning(char *aWarning){
  printf("Warning : %s%s", aWarning,"\n");
}

void writeError(char *anError){
  printf("Error : %s%s", anError,"\n");
}

void writeToLog(char *alog){
  printf("Success : %s%s", alog,"\n");
}

This is how I compile

bison -d parser.y -o parser.tab.c
flex -o lexer.yy.c lexer.I
gcc lexer.yy.c parser.tab.c -o parser

This is how I run

./parser test.txt

Then this is the output that I get

SIZE: 3 // this is output from the createVar function
NAME: XY-1
Success : Created variable succesfully

SIZE: 4
NAME: Y
Success : Created variable succesfully

SIZE: 4
NAME: Z
Success : Created variable succesfully

"Please enter a number?" // This is output for debugging from the lexer
PRINT // This is output for debugging from the parser,
syntax error

Solution

  • Leaving out the actions (which do not change the grammar) your other_print rule is:

    other_print:
        TERM_VARIABLE_NAME TERM_INVALID_VARIABLE_NAME TERM_SEPARATOR TERM_STR
    

    That odd sequence of terminals is not present in your input file, so the production won't match, leading to a syntax error report.

    It seems unlikely that you actually wanted that syntax; my guess is that you intended the four terminals to be alternatives rather than a concatenation. If so, you need to separate them with |s.

    other_input has a similar issue.