Search code examples
bisonflex-lexeryacc

Parsing in bison stops at first line


I am creating a bison grammar in a python like language and the output that i am getting when running my testcode file is this:

found identifier a at line 2
memory exhausted

Parsing completed successfully

I am getting some shift reduce errors and reduce reduce but i can create normally my .exe file and if i run it it shows this.

I've tried reducing most of my shift/reduces to no vail. Is that really the problem though? Because i think it wouldn't give me an .exe

.l file

%{
#include <stdio.h>
#include <stdlib.h>
#include "sym_tab.h"
#include "define.h"

FILE *new_file;
int stringtoint;
int current_indent = 0;
void count();
void comment();
int count_indent();

%}



L                  [A-Za-z]
D                  [0-9]
N                  [1-9]
C   "%"|"!"|"@"|"$"|"%"|"^"|"&"|"_"



identifier          {L}({L}|{D})*
dec_const           (-|\+)*(0|{N}{D}*)
blank               [ \v\f]+
invalid_identifier  {D}|{C}(({L}|{D})*|{L})
invalid_keyword     {C}({L}|{D})+
block_count                 ^[\t]+


%%

"while"           {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(WHILE); }

"for"           {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(FOR); }
"in range"            {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(IN_RANGE); }

"input"           {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(INPUT); }
"print"           {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(PRINT); }

"if"              {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(IF); }
"elif"            {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(ELIF); }
"else"            {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(ELSE); }

"and"             {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(AND); }
"not"             {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(NOT); }
"or"              {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(OR); }

"return"          {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(RETURN); }
"exit"            {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(EXIT); }

"def"             {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
                        addsym( yycopy, block_num ); return(DEF); }

L?\"(\\.|[^\\"])*\" {char *yycopy=strdup(yytext); count(); printf("found literal string %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(STRING_LITERAL); }


{dec_const}               {char *yycopy=strdup(yytext); count(); stringtoint=atoi(yycopy);if(stringtoint<(-32768)|| stringtoint>32767){
              printf("dec_const %d in line %d not an acceptable value\n",stringtoint,line);}else{
              printf("found dec_constant %s at line %d\n", yycopy,line);
              addsym( yycopy, block_num ); return(DEC_CONST);}}


{identifier}              {char *yycopy=strdup(yytext); count(); if(strlen(yycopy)>20){
               printf("identifier %s in line %d not valid(longer than 20 characters)\n",yycopy,line);}
               else{printf("found identifier %s at line %d\n", yycopy,line);
                           addsym( yycopy, block_num ); return(IDENTIFIER);}}


"+"                       {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(PLUS);}
"-"                       {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(MINUS);}
"*"                       {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(STAR);}
"/"                       {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(DIV);}
"<"                       {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(L_THAN);}
">"                       {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(G_THAN);}
"=="                      {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(EQUAL);}
"<="                      {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(L_EQ_THAN);}
">="                      {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(G_EQ_THAN);}
"<>"                      {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(NEQUAL);}
":="                        {char *yycopy=strdup(yytext); count(); printf("found asign_symbol %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(ASSIGN);}
"("                       {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(LPAREN);}
")"                       {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(RPAREN);}

"["                       {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(LSQUARE_BRACK);}
"]"                       {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(RSQUARE_BRACK);}

"\n"                        {char *yycopy=strdup(yytext); count(); printf("found new line at line %d\n" , line);
                           addsym( yycopy, block_num ); return(END_LINE);}
","                       {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(COMMA);}
":"                                             {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
                           addsym( yycopy, block_num ); return(COLON);}

"#"                      { comment();}

{blank}                   { count();}



{invalid_keyword}    {char *yycopy=strdup(yytext); count(); printf("invalid keyword %s at line %d\n", yycopy, line);
              addsym( yycopy, block_num );}

{invalid_identifier}   {char *yycopy=strdup(yytext); count(); printf("invalid identifier %s at line %d\n", yycopy, line);
              addsym( yycopy, block_num );}


.              {char *yycopy=strdup(yytext); count(); printf("unexpected character %s at line %d\n", yycopy, line);
              addsym( yycopy, block_num );}

%%

int yywrap()
{
  return 1;
}

// void main(int argc, char *argv[]){
//   int ret_val=1;
//
//   if (argc!=2) printf("\nUsage: lexyy <input file name> \n");
//   else
//     if ((new_file=fopen(argv[1],"r"))==NULL)
//       printf("\n<%s> not found.\n",argv[1]);
//     else{
//       yyrestart(new_file);
//       while(ret_val!=0){
//         ret_val=yylex();
//       }
//       fclose(new_file);
//     }
//}

void count()
{
        int i;
        for(i=0;yytext[i]!='\0';i++)
        if(yytext[i]=='\n')
        {
                line++;
        }
}

int count_indent()
{
  int i;
  int tab_num = 0;
  for(i=0;yytext[i]=='\t';i++)
  {
    tab_num++;
  }
  return tab_num;
}


void comment()
{
  int c;
  while(c=input()!='\n' && c!=EOF)
  {

  }
  line++;
}

.y file

%{
    #include<stdio.h>
    #include<stdlib.h>
    #include<string.h>
    #include <ctype.h>
    #include "y.tab.h"
    extern int yylex();
    extern FILE *yyin;
%}


%token L_THAN G_THAN EQUAL L_EQ_THAN G_EQ_THAN NEQUAL
%token ASSIGN
%token LPAREN RPAREN LSQUARE_BRACK RSQUARE_BRACK
%token END_LINE COMMA COLON
%token INDENT DEDENT


%start Program
%%

Program: Block Program
    | Empty
    ;

Empty: /* empty */
    ;

Block: Declarations
    | Subprograms
    | Sequence
    ;



%%
extern int column;

int main(int argc, char *argv[])
{
    yyin = fopen("test_code.sy", "r");
    if(yyparse()==1)
        printf("\nParsing failed\n\n");
       else
        printf("\nParsing completed successfully\n");
       fclose(yyin);
    return 0;
}

int yyerror(s)
char *s;
{
                printf("%s\n", s);
        fflush(stdout);
        return 1;
}

**EDIT:**sym_tab.h file

/*#include <iostream.h>*/
#include <stdio.h>
#include <malloc.h>
#include <string.h>

#define table_size          100

extern int line=1;
extern int end_file=1;
extern int block_num=0;
extern FILE *new_file; 

typedef struct hash_sym
    {
    struct hash_sym *prev, *next;
    char *nam;
    char *str_val;
    char *id_type;
    int  id_value;
    int  block_num;
} Hashing_table; 


Hashing_table *table[table_size];

int hash_funct( char str[], int hash_size);

addsym( sym, bloc_num )
register char sym[];
int bloc_num;

{   
    int hash_val = hash_funct( sym, table_size );
    register struct hash_sym *sym_sym = table[hash_val];
    register struct hash_sym *new_sym;
    register struct hash_sym *successor;

    while ( sym_sym!=0 )
    {

        if (  strcmp( sym, table[hash_val]->nam )==0 )
        {   

            printf("the entry %s at line %d already exists at symbol table\n", sym,line);   
            return -1;
        }   

        sym_sym = sym_sym->next;
    }


    new_sym = (struct hash_sym *)
    malloc( sizeof( struct hash_sym ) );


    if ( (successor = table[hash_val]) )
    { 

        new_sym->next = successor;
        successor->prev = new_sym;
    }
    else
        new_sym->next = NULL;

    new_sym->prev = NULL;
    new_sym->nam = sym;
    new_sym->block_num = bloc_num;

    table[hash_val] = new_sym;
    return 0;
}




int hash_funct( str, hash_size )
register char str[];
int hash_size;
{
    register int hashval;
    register int i;

    hashval = 0;
    i = 0;

    while ( str[i]!='\0' )
        {
        hashval = hashval +  str[i++]*(16+i);
        /*hashval %= hash_size;*/
        }
    return (hashval %= hash_size);

}


I expected the parser to parse until the end of the file. I haven't setup the prints yet in my .y file so i wouldn't expect a print in there.


Solution

  • The basic problem is that your grammar includes an arbitrary repetition of a non-terminal which can match the empty string.

    That's always ambiguous, because it is impossible to distinguish between an empty string and two consecutive empty strings, or indeed a million consecutive empty strings. So repetition of a nullable non-terminal will always produce a conflict. Most of the time, the generated parser is just incorrect, but it still terminates. Bison resolves shift/reduce conflicts by choosing to shift, which guarantees that the parser will progress through the input. In effect, it resolves the question "how many empty strings are there" with the smallest possible answer, typically "one".

    But in your case, the repetition has more than one alternative, several of which are nullable. Now the parser has an even harder problem: it has to figure out which non-terminal the empty string should match. That's a reduce/reduce conflict, and bison's resolution is to always pick the non-terminal which comes first in the grammar. That's going to be a problem if the correct choice for a particular input is some other non-terminal.

    Here's a minimal example of what I'm talking about:

    %%
    list: %empty | unit list
    unit: as | bs
    as: %empty | as 'a'
    bs: %empty | bs 'b'
    

    Here, a unit can be zero or more as, or zero or more bs. Since zero as and zero bs look identical, the parser really can't tell which one to choose from the grammar, so it always picks zero as (because that comes first in the grammar). The problem comes when the input contains a b. Since the parser never uses the rule bs: %empty (in fact, bison warns you about that), it can never apply the rule bs: bs 'b'. So faced with a b, the parser reduces an empty as, makes that into a unit, adds it to the list and then tries to parse another unit. However, nothing has changed; no token has been read so the lookahead is still a b. And so the parser will go into an endless loop parsing empty units containing an empty as over and over again.

    With the list production written as above (right recursively), these empty units need to be added to the parser stack. So the parser will eventually run out of space for its stack, and die with a "memory exhausted" error. If you change it to left recursion (list: %empty | list unit), then the parser doesn't need to use stack space, and it can go on parsing empty units forever.

    I suggest that you try the above simple example using bison's very useful trace feature (see "Debugging your parser" in the Bison Manual). That's a lot easier than filling your grammar file with printf calls, and it is also a lot more informative.

    To fix the problem, you just need to require unit to be non-empty, which avoids the "repetition of an empty string' problem. If every unit needs to match something, then the grammar matches the same language but it does so unambiguously.