Search code examples
windowsbisonfilesizeterminate

Bison parser execution terminates depending on input file size


I created a parser using bison and scanner using flex to parse the setupapi.log file in windows. I tried executing the parser by taking small portions of the file and it worked through. It worked fine for file with about 20 lines. But when I added two more words to the last line of the body, it terminated there giving the error as .exe has stopped working.

How can I get rid of this.

I use %union in my bison file and memory is allocated using malloc from the flex file whenever an identifier is encountered.

My flex file is:

%{  
 #include <stdio.h>
#include "check.tab.h"
#include <string.h>
 #include<malloc.h>
int yyline=1;
 int i;

%}



identifier      [_a-zA-Z\.0-9:=&\\{}()\'\-]+
time            ([0-9]+\:[0-9]+\:[0-9]+\.[0-9]+)

%%

"/"         {
            return(SLASH_TOK);
         }

"["         {
            return(OPEN_TOK);
         }

"]"         {
            return(CLOSE_TOK);
         }

">>>"       {
            return (START_TOK); 
        }

"<<<"       {
            return (END_TOK);
         }

"!!!"       {
            return(ERROR_TOK);
         }

  "!"           {
                return(WARN_TOK);
                }

   "...:"       {
                    return(VEND_INFO);
                }

   "bak:"       {
                    return(BAK_TOK);
                }

   "cci:"       {
                    return(CCI_TOK);
                }   



         //SOME MORE TOKENS ARE HERE....


"\n"        {
            yyline++;
        }

{time}      {
           yylval.sval = malloc(strlen(yytext)+1);
           printf("FRM LEXER TIME= %s %s\n",yytext,yylval.sval);
           strncpy(yylval.sval,yytext,strlen(yytext)+1);

            for(i=0;yylval.sval[i]!='\0';i++)
                 {
                        if(!(i<strlen(yytext)))
                        {
                         yylval.sval[i]='\0';
                        }
                }

                printf("FRM LEXER TIME= %s %s\n",yytext,yylval.sval);
                 return(TIME);
        }


[a-zA-Z_0-9:\\]+(\.)+[\.a-zA-Z_0-9:\\]* {printf("REACHED EXTRA\n");return(DATA);}


[a-zA-Z]+(\:)+[\.a-zA-Z_0-9:\\]*    {yylval.sval = malloc(strlen(yytext)+1);
                strncpy(yylval.sval,yytext,strlen(yytext)+1);
                printf("REACHED EXTRA\n");
                 for(i=0;yylval.sval[i]!='\0';i++)
                 {
                        if(!(i<strlen(yytext)))
                        {
                         yylval.sval[i]='\0';
                        }
                }
                return(DATA);
             }

{identifier}    {

                yylval.sval = malloc(strlen(yytext)+1);
                strncpy(yylval.sval,yytext,strlen(yytext)+1);
                //printf("YYTEXT=%s %d\n",yytext,strlen(yytext));

                for(i=0;yylval.sval[i]!='\0';i++)
                {
                        if(!(i<strlen(yytext)))
                        {
                         yylval.sval[i]='\0';
                        }
                }

                printf("FRM LEXER ID = %s %s\n",yytext,yylval.sval);
                return(IDENTIFIER);
             }

%%

int yywrap(void)
{
return 1;
}

My bison file is:

%{

#include <stdio.h>
#include <string.h>

extern int yyline;
extern char* yytext;
FILE *fp;
%}

%union
{
    char *sval;
};

%token  SLASH_TOK   
 %token      OPEN_TOK
 %token     CLOSE_TOK 
 %token     START_TOK   
    //ALL TOKENS ARE HERE


%type <sval> boot_data description statements word date section_title time_stamp section_body log_entry evt_catgry evt_time


%start session
%%

session : boot_data section
        {
        printf("SESSION\n");
        fprintf(fp,"NO.OF LINES PARSED = %d\n",yyline);
    }
    ;

boot_data : description statements description boot_time
    {
            printf("BOOT DATA\n");
            fprintf(fp,"DESCRIPTION:%s\n",$1);
    }
     ;

description : OPEN_TOK statements CLOSE_TOK
    {
        $$=$2;
        printf("DESCRIPTION\n");
    }
    ;

boot_time: OPEN_TOK statements date TIME CLOSE_TOK
    {
        printf("BOOT TIME\n");
        fprintf(fp,"DATE:%s\n",$3);
        fprintf(fp,"TIME:%s\n",$4);
    }
    ;

date : IDENTIFIER SLASH_TOK IDENTIFIER SLASH_TOK IDENTIFIER
    {
        printf("DATE\n");
        $$=$1;
        strcat($$,"/");
        strcat($$,$3);
        strcat($$,"/");
        strcat($$,$5);
    }
     ;

statements : word
     {
            $$=$1;
            printf("STAETEMENT 1\n");
    }

    |statements word

    {

            $$=$1;
            strcat($$," ");
            strcat($$,$2);
            printf("statements 2\n");
    }
    ;

word : IDENTIFIER
    {
        printf("WORD=%s  %s\n",yytext, $$);

    } 
    |TIME
    {
         printf("TIME=%s  %s\n",yytext, $$);
         fprintf(fp,"%s",$$);

    }
    |DATA
     ;

section : section_head section_body section_tail

     {
            printf("Compilation_start\n");
    }

    ;

section_head : section_title time_stamp
    {
            printf("Compilation_start111\n");
     }
     ;

section_title :START_TOK OPEN_TOK statements CLOSE_TOK
    {
            printf("Compilation_start222\n");
            fprintf(fp,"%s\n",$3);
     }
      ;

time_stamp :START_TOK statements date TIME
    {
            printf("Compilation_start444\n");
            fprintf(fp,"%s Date: %s\n",$2,$3);
            fprintf(fp,"%s Time: %s\n\n",$2,$4);
    }
    ;


section_body :log_entry
        {
        printf("single  entry\n");

       }

      |section_body log_entry
     {
        printf("MULTIPLE ENTRIES\n");            
     }
     ;

log_entry :entry_prefix evt_time evt_catgry statements
    {
                    printf("no time stamp\n");  
                   // fprintf(fp,"====%s====\n",$3);
                    }
    ;




section_tail :section_foot exit_status 
    {
            printf("Compilation_start666\n");
    }
      ;

section_foot:  END_TOK statements date TIME
    {
            printf("Compilation_start777\n");
            fprintf(fp,"\n\n%s Date: %s\n",$2,$3);
            fprintf(fp,"%s Time: %s\n",$2,$4);
    }
    ;

 exit_status : END_TOK OPEN_TOK statements CLOSE_TOK
    {
            printf("Compilation_start888\n");
            fprintf(fp,"\n%s\n",$3);
    }
    ;


%%

yyerror(char *str)
{
printf("Parse error: %s at line %d ,",str,yyline);
yywhere(); 
}

yywhere()
{   
int colon=0;
int i;
if(yytext) 
{ 
    for(i=0;i<50;i++) if(!yytext[i] || yytext[i] == '\n') break;
    if(i)
    {  
        if(!colon)  
        { 
            printf(" near: \"%s\"\n",  yytext);
            colon = 1; 
        }
    }  
    printf("\n");
}  
}   

int main(void) 
{
int i =remove("output.txt");
fp=fopen("output.txt","a+");

yyparse();
fclose(fp);
}

Solution

  • from the strcat man page (should be valid for Windows as well):

    The strcat() function appends the src string to the dest string, overwriting the null byte ('\0') at the end of dest, and then adds a terminating null byte. The strings may not overlap, and the dest string must have enough space for the result.

    Now you're doing:

    statements : word
         {
                $$=$1;
                printf("STAETEMENT 1\n");
        }
    
        |statements word    
        {
    
                $$=$1;
                strcat($$," ");
                strcat($$,$2);
                printf("statements 2\n");
        }
        ;
    

    So if I assume you get an input of two words, the following happens:

    After the first word, statements points to this word, because you do $$ = $1. Then your scanner reads the next word and you concatenate a blank and the next word to the first word. But you didn't take precautions regarding the available memory. You're programming in C which means you'll have to do about everything by yourself.

    You could change the second action to something like:

    statements : statements word
         {
                int lgth1 = strlen($$);
                int lgth2 = strlen($2);
                /* instead of malloc you could also realloc */
                char *tmp = (char *) malloc((lgth1 + lgth2 + 1 + 1) * sizeof(char));
                /* no error handling for clarity */
                sprintf(tmp, "%s %s", $$, $2); /* alternative: strcpy(); strcat(); strcat(); */
                free($$); free($2); /* freeing the memory not used any more */
                $$ = tmp;
        }
    

    This of course isn't bullet proof code and you'll have to fix the other places as well. Hope this helps.