Search code examples
cyacclex

Yacc parse won't finish reduction of a production


I am working on building a small interpreter through lax and yacc that works on a basic programming language that performs addition and multiplication as well as printing lists of ints.

For example the instruction:

Print(2,3,4);

Should output: 2 3 4

and the instruction:

Print(+(2,3));

should output: 5

The first print instruction works perfectly fine. However any addition instruction (+ followed by a list) works itself out and the action returns the correct answer (found out through printf) but it seems yacc stops before executing the outer print instruction occurs after the addition instruction.

Here is my .l file:

%{
#include "y.tab.h"
%}

digit [0-9]

%%
{digit}{digit}*     {yylval.str = strdup(yytext); return IntLit;}
Print               {return Print;}
\+                  {yylval.str = strdup(yytext); return '+';}
\*                  {yylval.str = strdup(yytext); return '*';}
\(                  {return '(';}
\)                  {return ')';}
\,                  {return ',';}
\;                  {return ';';}
\t                  {}
\r                  {}
\n                  {}

%%

    
int yywrap () {
    return 1;
}

And here is my .y file:

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>


extern int yylex();
extern int yyparse();
extern int yyerror(char *s);
extern char *yytext;

void doPrint(char *s);
int evaluate(char *c, char *s);
char* append(char *s, char *s2);
char* makeSingle(char *s);

%}

%union {
  char character;
  char *str;
}

%type <str> Item
%type <str> IntLit
%type <str> List
%type <str> Func

%token Print
%token IntLit

%%

Prog        :   StmtSeq                 { };
StmtSeq     :   Stmt StmtSeq            { };
StmtSeq     :                           { };
Stmt        : Print '(' List ')' ';'    { doPrint($3); };   
List        : List ',' Item             { $$ = append($1, $3); };
List        : Item                      { $$ = makeSingle($1); };
Item        : Func '(' List ')'         { $$ = evaluate($1, $3); };
Item        : IntLit                    { $$ = $1;};
Func        : '+'                       {$$ = yylval.str; };
Func        : '*'                       {$$ = yylval.str; };

%%

int main(int argc, char *argv[]){
    //yydebug = 1;
    return yyparse();
}

void doPrint(char *s){
    char * token = strtok(s, ",");
    while(token != NULL){
        printf("%s", token);
        printf(" ");
        token = strtok(NULL, ",");
    }
    printf("\n");
}

int evaluate(char *c, char *s){
    char * result;
    int res;
    int x;
    char * token;
    int cmp = strcmp(c, "+");
    if(cmp == 0){
        token = strtok(s, ",");
        res = 0;
        while(token != NULL){
            x = atoi(token);
            res = res + x;
            token = strtok(NULL, ",");
        }
        sprintf(result,"%d",res);
    } else {
        token = strtok(s, ",");
        res = 1;
        while(token != NULL){
            x = atoi(token);
            res = res * x;
            token = strtok(NULL, ",");
        }
        sprintf(result,"%d",res);
    }
    printf("resultstring is: '%s'\n", result);
    return result;
}

char* append(char *s, char *s2){
    char * result = s;
    strcat(result, ",");
    strcat(result, s2);
    return result;
}

char* makeSingle(char *s){
    //char * result;
    //sprintf(result, "%d", c);
    //return result;
    return s;
}

extern int yyerror(char *s)  {
  printf(s);
  return 1;
}

If I input the instructions:

Print(2,3,4);
Print(+(2,3));

The first print instruction works as expected, but the second instruction stops before printing the result of the addition, but after the evaluation is done.

I'm new to yacc/lex and I'm not sure why yacc is stopping without printing the result of the addition. Shouldn't the result of the addition be an "Item" which can be rewritten as a "List" and then printed properly? Any help or recommendations would be greatly appreciated, thanks!

Edit After looking more into the output when yydebug = 1; I found that the parse abruptly ends (I believe) before a full reduce is done. The last section of the debug process reads:

Reducing stack by rule 7 (line 40):
    $1 = nterm Func ()
    $2 = token '(' ()
    $3 = nterm List ()
    $4 = token ')' ()

There is no $$ in this reduction as there is in all the other reductions done before


Solution

  • What do you see when you do: doPrint(1); ?
    You have a line in doPrint: char * token = strtok(s, ",");
    When you go through the evaluate function you consume all of the commas, leaving a single number. When you call doPrint, there is no comma in the string so it returns NULL.
    You could have a second if, for when it returns NULL (but the string has contents), or you can rework how you are parsing the string.

    By the way, strdup() returns a malloced string which you don't free.
    You also call strcat() using the strings received from strdup, you're writing to memory and you don't know the size. strcat() will let you overflow buffers.
    A better way, when you are appending a string to another and you don't know their sizes is to do something like this:

    char *mergestrings(char *str1,char *str2) {
      char *newstr=(char*)malloc(sizeof(char)*(strlen(str1)+strlen(str2)+1));
      strcpy(newstr,str1);
      strcat(newstr,str2);
      free(str1);
      free(str2);
      return newstr;
    }
    

    or use realloc or something else, but you should pay attention to what you do with memory.

    The big problems were the definition of the evaluate function and usage of pointers.
    scanner.l:

    %{
    #include "../obj/y.tab.h"
    %}
    
    digit 0|(([1-9])[0-9]*)
    operator [+*]
    
    %%
    {digit}             {yylval.str = strdup(yytext); return IntLit;}
    {operator}          {yylval.character=yytext[0]; return Operator;}
    "Print"             {return Print;}
    \(                  {return LParen;}
    \)                  {return RParen;}
    \,                  {return Comma;}
    \;                  {return SemiCln;}
    \t                  {}
    \r                  {}
    \n                  {}
    
    %%
    

    parser.y:

    %{
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    
    extern int yylex();
    extern int yyparse();
    extern int yyerror(char *s);
    extern char *yytext;
    
    void doPrint(char *s);
    char* evaluate(char c, char *s);
    char* append(char *s, char *s2);
    
    %}
    
    %union {
      char character;
      char *str;
    }
    
    %type <str> Item List
    %type <character> Func
    
    %token Print Operator IntLit LParen RParen SemiCln Comma
    
    %%
    
    Prog       : Stmt                                { }
               | Prog Stmt                           { }
               ;
    Stmt       : Print LParen List RParen SemiCln    { doPrint($3); }
               ;
    List       : List Comma Item                     { $$ = append($1, $3); }
               | Item                                { $$ = $1; }
               ;
    Item       : Func LParen List RParen             { $$ = evaluate($1, $3); }
               | IntLit                              { $$ = yylval.str;}
               ;
    Func       : Operator                            { $$ = yylval.character; }
               ;
    
    %%
    
    int main(int argc, char *argv[]){
        return yyparse();
    }
    
    void doPrint(char *s){
        char *stop = strchr(s, ','), *start=s;
        while(stop){
            *stop='\0';
            printf("%s ", start);
            start = stop+1;
            stop = strchr(start, ',');
        }
        printf("%s\n",start);
        free(s);
    }
    
    char *evaluate(char c, char *s){
        int res=0;
        if(c == '+'){
            char *stop = strchr(s, ','), *start=s;
            while(stop){
                *stop='\0';
                res += atoi(start);
                start = stop+1;
                stop = strchr(start, ',');
            }
            res += atoi(start);
        } else {
            char *stop = strchr(s, ','), *start=s;
            res = 1;
            while(stop){
                *stop = '\0';
                res *= atoi(start);
                start = stop+1;
                stop = strchr(start, ',');
            }
            res *= atoi(start);
        }
        free(s);
        char *result = (char*)malloc(12); // big enough for a 32 bit integer
        sprintf(result,"%d",res);
        return result;
    }
    
    char* append(char *s, char *s2){
        char *result = (char*)realloc(s,strlen(s)+strlen(s2)+2);
        if (result) {
          strcat(result, ",");
          strcat(result, s2);
        }
        else {
            result=(char*)malloc(strlen(s)+strlen(s2)+2);
            sprintf(result,"%s,%s",s,s2);
            free(s);
        }
        free(s2);
        return result;
    }
    
    extern int yyerror(char *s)  {
      printf(s);
      return 1;
    }