Search code examples
parsingbisonflex-lexeryacclex

Lex Yacc syntax error after adding semantic actions


I'm making a parser with Lex&Yacc for a school project, and I have some unexplained issues with my syntax analysis.

First of all, this is my yacc file that doesn't work.

%{

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

extern FILE *yyin;
extern char* yytext;

%}

%token <code> IDENTIFIER CONSTANT SIZEOF
%token <code> PTR_OP LE_OP GE_OP LES_OP GES_OP EQ_OP NE_OP
%token <code> AND_OP OR_OP
%token <code> EXTERN
%token <code> INT VOID
%token <code> STRUCT 
%token <code> IF ELSE WHILE FOR RETURN

%union
{
    int number;
    char* code;
}

%start program
%type <code> primary_expression postfix_expression argument_expression_list unary_expression
%type <code> unary_operator multiplicative_expression additive_expression relational_expression
%type <code> equality_expression logical_and_expression logical_or_expression expression
%type <code> declaration declaration_specifiers type_specifier struct_specifier
%type <code> struct_declaration_list struct_declaration declarator direct_declarator
%type <code> parameter_list parameter_declaration statement compound_statement
%type <code> declaration_list statement_list expression_statement selection_statement
%type <code> iteration_statement jump_statement program external_declaration
%type <code> function_definition
%%

primary_expression
        : IDENTIFIER    {strcpy($$,$1);} 
        | CONSTANT {strcpy($$,$1);}
        | '(' expression ')'    {
                    char* temp = (char*)malloc((2 + strlen($2))*sizeof(char));
                    sprintf(temp,"(%s)",$2);
                    strcpy($$,temp);
                    free(temp);
                    }
    | SIZEOF '(' type_specifier ')'     {
                            char* temp = (char*)malloc((8 + strlen($3))*sizeof(char));
                            sprintf(temp,"sizeof(%s)",$3);
                            strcpy($$,temp);
                            free(temp);
                            }
    | SIZEOF '(' IDENTIFIER ')'     {
                            char* temp = (char*)malloc((8 + strlen($3))*sizeof(char));
                            sprintf(temp,"sizeof(%s)",$3);
                            strcpy($$,temp);
                            free(temp);
                            }
        ;

postfix_expression
        : primary_expression                        {strcpy($$,$1);}
        | postfix_expression '(' ')'                        {
                            char* temp = (char*)malloc((2 + strlen($1))*sizeof(char));
                            sprintf(temp,"%s()",$1);
                            strcpy($$,temp);
                            free(temp);
                                            }
        | postfix_expression '(' argument_expression_list ')'   {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s(%s)",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                            }
        | postfix_expression PTR_OP IDENTIFIER              {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s->%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                            }
        ;

argument_expression_list
        : expression                                {strcpy($$,$1);}
        | argument_expression_list ',' expression           {
                            char* temp = (char*)malloc((1 + strlen($1)+strlen($1))*sizeof(char));
                            sprintf(temp,"%s,%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                            }
        ;               

unary_expression
        : postfix_expression                            {strcpy($$,$1);}
        | unary_operator unary_expression               {
                            char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s%s",$1,$2);
                            strcpy($$,temp);
                            free(temp);}
        ;

unary_operator
        : '&'                                       {strcpy($$,"&");}
        | '*'                                       {strcpy($$,"*");}
        | '-'                                       {strcpy($$,"-");}
        ;

multiplicative_expression
        : unary_expression                          {strcpy($$,$1);}
        | multiplicative_expression '*' unary_expression    {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s*%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                            }
        | multiplicative_expression '/' unary_expression        {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s/%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                            }
        ;

additive_expression
        : multiplicative_expression                 {strcpy($$,$1);}
        | additive_expression '+' multiplicative_expression {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s+%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                            }
        | additive_expression '-' multiplicative_expression {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s-%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                            }
        ;

relational_expression
        : additive_expression                           {strcpy($$,$1);}
        | relational_expression LES_OP additive_expression      {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s<=%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                                }
        | relational_expression GES_OP additive_expression      {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s>=%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);         
                                                }
        | relational_expression LE_OP additive_expression       {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s<%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                                }
        | relational_expression GE_OP additive_expression       {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s>%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

equality_expression
        : relational_expression                         {strcpy($$,$1);}
        | equality_expression EQ_OP relational_expression       {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s==%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                            }
        | equality_expression NE_OP relational_expression       {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s!=%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

logical_and_expression
        : equality_expression                           {strcpy($$,$1);}
        | logical_and_expression AND_OP equality_expression {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s&&%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

logical_or_expression
        : logical_and_expression                            {strcpy($$,$1);}
        | logical_or_expression OR_OP logical_and_expression    {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s||%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

expression
        : logical_or_expression                         {strcpy($$,$1);}
        | unary_expression '=' expression                   {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s=%s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

declaration
        : declaration_specifiers declarator ';'     {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s%s;",$1,$2);
                            strcpy($$,temp);
                            free(temp);
                                }
        | struct_specifier ';'              {
                            char* temp = (char*)malloc((1 + strlen($1))*sizeof(char));
                            sprintf(temp,"%s;",$1);
                            strcpy($$,temp);
                            free(temp);
                                }
        ;

declaration_specifiers
        : EXTERN type_specifier             {
                            char* temp = (char*)malloc((strlen($1)+ strlen($2))*sizeof(char));
                            sprintf(temp,"%s %s",$1,$2);
                            strcpy($$,temp);
                            free(temp);
                                    }
        | type_specifier                    {strcpy($$,$1);}
        ;

type_specifier
        : VOID                          {strcpy($$,$1);}
        | INT                           {strcpy($$,$1);}
        | struct_specifier                  {strcpy($$,$1);}
        ;

struct_specifier
        : STRUCT IDENTIFIER '{' struct_declaration_list '}'     {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($2) + strlen($4))*sizeof(char));
                            sprintf(temp,"%s %s {%s}",$1,$2,$4);
                            strcpy($$,temp);
                            free(temp);
                                                }
        | STRUCT '{' struct_declaration_list '}'                {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s {%s}",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                                }
        | STRUCT IDENTIFIER                         {
                            char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s %s",$1,$2);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

struct_declaration_list
        : struct_declaration                                {strcpy($$,$1);}
        | struct_declaration_list struct_declaration            {
                            char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s %s",$1,$2);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

struct_declaration
        : type_specifier declarator ';'         {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s %s;",$1,$2);
                            strcpy($$,temp);
                            free(temp);
                                    }
        ;

declarator
        : '*' direct_declarator                         {
                            char* temp = (char*)malloc((1 + strlen($2))*sizeof(char));
                            sprintf(temp,"*%s",$2);
                            strcpy($$,temp);
                            free(temp);
                                                }
        | direct_declarator                             {strcpy($$,$1);}
        ;

direct_declarator
        : IDENTIFIER                                    {strcpy($$,$1);}
        | '(' declarator ')'                                {
                            char* temp = (char*)malloc((2 + strlen($2))*sizeof(char));
                            sprintf(temp,"(%s)",$2);
                            strcpy($$,temp);
                            free(temp);
                                                }
        | direct_declarator '(' parameter_list ')'              {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s(%s)",$1,$3);
                            strcpy($$,temp);
                            free(temp);         
                                                }
        | direct_declarator '(' ')'                         {
                            char* temp = (char*)malloc((2 + strlen($1))*sizeof(char));
                            sprintf(temp,"%s()",$1);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

parameter_list
        : parameter_declaration                         {strcpy($$,$1);}
        | parameter_list ',' parameter_declaration              {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s, %s",$1,$3);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

parameter_declaration
        : declaration_specifiers declarator                 {
                            char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s %s",$1,$2);
                            strcpy($$,temp);
                            free(temp);
                                                }
        ;

statement
        : compound_statement                            {strcpy($$,$1);}
        | expression_statement                          {strcpy($$,$1);}
        | selection_statement                           {strcpy($$,$1);}
        | iteration_statement                           {strcpy($$,$1);}
        | jump_statement                                {strcpy($$,$1);}
        ;

compound_statement
        : '{' '}'                                   {strcpy($$,"{}");}
        | '{' statement_list '}'                        {
                            char* temp = (char*)malloc((2 + strlen($2))*sizeof(char));
                            sprintf(temp,"{%s}",$2);
                            strcpy($$,temp);
                            free(temp);
                                            }
        | '{' declaration_list '}'                      {
                            char* temp = (char*)malloc((2 + strlen($2))*sizeof(char));
                            sprintf(temp,"{%s}",$2);
                            strcpy($$,temp);
                            free(temp);
                                            }
        | '{' declaration_list statement_list '}'               {

                            char* temp = (char*)malloc((2 + strlen($2) + strlen($3))*sizeof(char));
                            sprintf(temp,"{%s%s}",$2,$3);
                            strcpy($$,temp);
                            free(temp);
                                            }
        ;

declaration_list
        : declaration                               {strcpy($$,$1);}
        | declaration_list declaration                  {
                            char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s %s",$1,$2);
                            strcpy($$,temp);
                            free(temp);
                                            }
        ;

statement_list
        : statement                             {strcpy($$,$1);}
        | statement_list statement                  {
                            char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s %s",$1,$2);
                            strcpy($$,temp);
                            free(temp);
                                            }
        ;

expression_statement
        : ';'               {strcpy($$,";");}
        | expression ';'        {
                            char* temp = (char*)malloc((1 + strlen($1))*sizeof(char));
                            sprintf(temp,"%s;",$1);
                            strcpy($$,temp);
                            free(temp);
                    }
        ;

selection_statement
        : IF '(' expression ')' statement                           {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3) + strlen($5))*sizeof(char));
                            sprintf(temp,"%s (%s) %s",$1,$3,$5);
                            strcpy($$,temp);
                            free(temp);
                                                    }
        | IF '(' expression ')' statement ELSE statement                {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3) + strlen($5) + strlen($6) + strlen($7))*sizeof(char));
                            sprintf(temp,"%s (%s) %s %s %s",$1,$3,$5,$6,$7);
                            strcpy($$,temp);
                            free(temp);
                                                    }
        ;

iteration_statement
        : WHILE '(' expression ')' statement                                    {
                            char* temp = (char*)malloc((1 + strlen($1) + strlen($3) + strlen($5))*sizeof(char));
                            sprintf(temp,"%s (%s) %s",$1,$3,$5);
                            strcpy($$,temp);
                            free(temp);
                                                                }
        | FOR '(' expression_statement expression_statement expression ')' statement    {
                            char* temp = (char*)malloc((2 + strlen($1) + strlen($3) + strlen($4) + strlen($5) + strlen($7))*sizeof(char));
                            sprintf(temp,"%s (%s %s %s) %s",$1,$3,$4,$5,$7);
                            strcpy($$,temp);
                            free(temp);
                                                                }
    ;

jump_statement
        : RETURN ';'                {strcpy($$,$1);}
    | RETURN expression ';'     {
                            char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s %s;",$1,$2);
                            strcpy($$,temp);
                            }
        ;

program
        : external_declaration              {strcpy($$,$1);}
        | program external_declaration          {
                            char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
                            sprintf(temp,"%s %s",$1,$2);
                            strcpy($$,temp);
                            free(temp);
                                    }
        ;

external_declaration
        : function_definition                   {strcpy($$,$1);}
        | declaration                       {strcpy($$,$1);}
        ;

function_definition
        : declaration_specifiers declarator compound_statement      {

                            char* temp = (char*)malloc((strlen($1) + strlen($2) + strlen($3))*sizeof(char));
                            sprintf(temp,"%s %s %s",$1,$2,$3);
                            strcpy($$,temp);
                            free(temp);
                                                    }
        ;

%%


int yyerror(char *s) {
  printf("%s\n",s);
  printf("%s",yytext);
}

int main(int argc, char *argv[]) {
  yyin = fopen(argv[1],"r");
  yyparse();
  return 0;
}

The final purpose of this parser is to generate a 3 address code with an initial code (near to an old C language).

For now, I'm just creating a simple parser to test if the input file is correctly written in my first language.

In my actual code, semantic actions are here to build the same code as the input, to then write it in an external file.

It may seems completely idiot for you but it's my first compiler project and I think it's a crucial step for me if I want to generate code by the future (since I just want a code translation).

So this is the issue. Before the implementation of my semantic actions, the parser was perfectly parsing test files, no syntax error. But now, it doesn't work anymore.

Are we ok that semantic actions don't influence the syntax parse ?

PS: There is my lex file :

chiffre                       [0-9]
lettre            [a-zA-Z]
exposant                       [Ee][+-]?{D}+
commentaire    [\/][\*][^\/\*]*[\*][\/]

identificateur   {lettre}({lettre}|_|{chiffre})*
entier       {chiffre}+
pointeur        [-][>]

%{
#include <stdio.h>
#include "y.tab.h"


/* Ne gère pas les commentaires. A rajouter */
/* Supprimer les lignes dont vous n'avez pas besoin. */

%}

%%

"else"                  {yylval.code=yytext;return ELSE;}
"extern"                {yylval.code=yytext;return EXTERN;}
"for"                   {yylval.code=yytext;return FOR;}
"if"                    {yylval.code=yytext;return IF;}
"int"                   {yylval.code=yytext;return INT;}
"return"                {yylval.code=yytext;return RETURN;}
"sizeof"                {yylval.code=yytext;return SIZEOF;}
"struct"                {yylval.code=yytext;return STRUCT;}
"void"                  {yylval.code=yytext;return VOID;}
"while"                 {yylval.code=yytext;return WHILE;}

{entier}            {yylval.code=yytext;return CONSTANT; }
{identificateur}    {yylval.code=yytext;return IDENTIFIER;}


"<"             {yylval.code=yytext;return LE_OP;}
">"             {yylval.code=yytext;return GE_OP;}
"<="                {yylval.code=yytext;return LES_OP;}
">="                {yylval.code=yytext;return GES_OP;}
"=="                {yylval.code=yytext;return EQ_OP;}
"!="                {yylval.code=yytext;return NE_OP;}
"&&"            {yylval.code=yytext;return AND_OP;}
"||"                {yylval.code=yytext;return OR_OP;}

"*"             {yylval.code=yytext;return '*';}
"-"             {yylval.code=yytext;return '-';}
"&"             {yylval.code=yytext;return '&';}
{pointeur}      {yylval.code=yytext;return PTR_OP;}

"+"             {yylval.code=yytext;return '+';}
"/"             {yylval.code=yytext;return '/';}

{commentaire}   {/* commentaire ne rien faire */}

","             {yylval.code=yytext;return ',';}
";"             {yylval.code=yytext;return ';';}
"("             {yylval.code=yytext;return '(';}
")"             {yylval.code=yytext;return ')';}
"{"             {yylval.code=yytext;return '{';}
"}"             {yylval.code=yytext;return '}';}
"="             {yylval.code=yytext;return '=';}
[ \t\n\v\r]+             { /* rien faire */ }
.                       { printf("erreur : b");printf("%s",yytext);printf("a\n");printf("Erreur lexicale. \n"); }
%%

and finally one of my test files

extern int printd( int i );

int main() {
  int i;
  int j;
  i = 45000;
  j = -123;
  printd(i+j);
  printd(45000+j);
  printd(i+123);
  printd(45000+123);
  printd(i+(j+0));
  printd((i+0)+j);
  printd((i+0)+(j+0));
  printd((i+0)+123);
  printd(45000+(j+0));
  return 0;
}

After those changes, the parser is blocking at the first "int" keyword.

Thanks for your help...


Solution

  • Your string handling needs work.

    This is never correct:

    yylval.code=yytext;
    

    yytext points into the internal temporary buffer used by the scanner to hold part of the input, and its contents will change. It might even be reallocated.

    If you need to keep the string corresponding to a token, you need to make a dynamically-allocated copy (and free the copy when you no longer need it). In the case of keyword and operator tokens, that may be considered unnecessary overhead; you could just use the token number as a key in a lookup table of string literals, or some other copy-avoidance mechanism.

    Moreover, your actions in you parser include calls like:

    strcpy($$, $1);
    

    But strcpy requires the destination to be a pointer to a valid character array known to be at least as long as the source. Since you don't initialize $$, that might be considered an unreasonable expectation. As it happens, $$ is preinitialized to $$, so the above is equivalent to strcpy($1, $1), which is an error, not a no-op: strcpy requires that the source and destination not overlap.

    A particularly egregious example occurs in the action for declaration_specifiers:

    char* temp = (char*)malloc((strlen($1)+ strlen($2))*sizeof(char));
    sprintf(temp,"%s %s",$1,$2);
    strcpy($$,temp);
    free(temp);
    

    First, as per the point about using yytext in you scanner actions, it is unlikely that $1 and $2 contain useful information at this point. Second, $$ is the same as $1, so it is pointing into the scanner's internal buffer. That means that the strcpy will overwrite the scanner's internal buffer with random text, possibly overrunning it. Nothing good can come from that.

    And in passing, the malloc used to create temp has the wrong length computation. It should be:

    char* temp = malloc(strlen($1) + strlen($2) + 2);
    

    because of the space character and the NUL terminator. So that's another buffer overrun. (And temp is unnecessary; you should be using $$ directly to hold the address of the newly allocated string.)