Search code examples
flex-lexerlex

Cannot identify the error yylex() produces


I'm trying to make a .l file for a Pascal-like language. As I'm running it with g++ it crushes after the 20th ish step in the parsing process with different files, one has more definitions in it and one fewer. I tried to get the error but it only send 3 zeros. Did I miss something somewhere?

This is the Utile.h file

#include <map>
#include <iterator>
#include <vector>
#include <iostream>
#include <fstream>
#include <string>
using namespace std;

std::vector<std::string> TS;  


typedef struct {
 int n;
 int elem[20][2];
} FIP;




void addFIP(int code, int posTS, FIP& f){
 f.elem[f.n][0]=code;
 f.elem[f.n++][1]=posTS;
}

FIP fip;
int pozTS=0;

void printFIP(FIP& f){
    ofstream fipFile;
    fipFile.open("FIP.txt");
    cout<<"nr elem fip"<<f.n<<endl;
    for(int i=0;i<f.n;i++)
        fipFile<<f.elem[0]<<" "<<f.elem[1]<<endl;
    fipFile.close();
}

And this is my specs.l file

%{

#include "Utile.h"
%}

%option noyywrap
%option caseless



LETTER      [A-Za-z]
NR_ZEC  [0-9]
NR_NZ    [1-9] 
ZERO        [0]      
ID  {LETTER}({LETTER}|{NR_ZEC})*
NR_BASE10   {NR_NZ}+{NR_ZEC}*|{ZERO}
NR_REAL     {NR_BASE10}"."{NR_BASE10}*
DELIMIT     [;.,:]
SIR_CAR     [\"][^\n]*[\"]
CARACTER    "'"[^\n]"'"
ERR_NR_START [0-9]+[a-zA-Z0-9]*
DOT             "\."
COLON           "\:"
SEMICOLON       "\;"
COMMA           "\,"
PLUS            "\+"

%%

[ \t\n]
[0-9]+[a-zA-Z]+[a-zA-Z0-9]* {printf("Eroare - identificator incepe cu cifra %s \n", yytext);}



read        {addFIP(19,-1,fip);printf("%s\n", yytext);}
write       {addFIP(20,-1,fip);printf("%s\n", yytext);}
then        {addFIP(21,-1,fip);printf("%s\n", yytext);}
variabiles   {addFIP(22,-1,fip);printf("%s\n", yytext);}

"="        {addFIP(200,-1,fip);printf("%s\n", yytext);}
\(         {addFIP(101,-1,fip);printf("%s\n", yytext);}
\)         {addFIP(102,-1,fip);printf("%s\n", yytext);}
\;         {addFIP(103,-1,fip);printf("%s\n", yytext);}   
\,         {addFIP(104,-1,fip);printf("%s\n", yytext);}
\.         {addFIP(105,-1,fip);printf("%s\n", yytext);}
\:         {addFIP(106,-1,fip);printf("%s\n", yytext);}    

"+"     {addFIP(300,-1,fip);printf("%s", yytext);}
\-      {addFIP(301,-1,fip);printf("%s", yytext);}

integer     {addFIP(29,-1,fip);printf("%s", yytext);}
real        {addFIP(30,-1,fip);printf("%s", yytext);}




{ID}    {addFIP(0,pozTS++,fip);printf("%s\n", yytext);}
{NR_BASE10} {
             addFIP(1,pozTS++,fip);
             printf("\n%d\n", 1);
        }
{NR_REAL}   {
            addFIP(1,pozTS++,fip);
            printf("\n%d\n", 1);
        }
"'"[^\n]"'" {
            addFIP(1,pozTS++,fip);
            printf("\n%d\n", 1);
        }
{SIR_CAR}   {addFIP(1,pozTS++,fip);printf("\n%d\n", 1);}



. printf("Error %s\n", yytext);

%%
void yyerror (char const *s) {
   fprintf (stderr, "%s\n", s);
 }



extern FILE *yyin;




main(int argc, char *argv[])
{
    yyin= fopen (argv[1] , "r");
    yylex();
    cout<<yytext;
    fclose(yyin);


}

I choose to print the yytext hoping that it will help me figure out where the problem is , but no luck

Also if it help i run it this way

flex specs.l
g++ lex.yy.c
a.exe test.txt

Solution

  • Your FIP structure only has room for 20 entries and addFIP doesn't check to see if it is full before adding a new one. So after about 20 tokens you will start overwriting random memory.

    Since you are using C++, why don't you just use a std::vector? You can just emplace_back the tokens, and you don't even need to keep track of how many there are since std::vector takes care of all the bookkeeping.

    Having said that, there are very few reasons to create a vector of tokens. Usually you can just process the tokens one at a time.