Search code examples
cparsingabstract-syntax-treebisonlexer

Error: unknown type name 'ASTNode' in Bison parser when integrating with Flex lexer


I'm working on a project where I'm using Bison to generate a parser and Flex to generate a lexer. My parser is meant to generate an Abstract Syntax Tree (AST), and I've defined the ASTNode structure within my Bison file. However, I keep encountering the following error:

In file included from lexer.l:5:0:
parser.y:39:5: error: unknown type name 'ASTNode'
    ASTNode *astNode;  // Add ASTNode pointer for expression and other nodes

This is my current parser file, parser.y:


%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

extern FILE *yyin;
extern FILE *yyout;

void yyerror(const char *s);

// Define NodeType enumeration for AST node types
typedef enum { NODE_VAR, NODE_ICONST, NODE_ADD, NODE_SUB, NODE_MUL, NODE_DIV } NodeType;

// Define the structure for AST nodes
typedef struct ASTNode {
    NodeType type;
    union {
        char *sval; // for variables
        int ival;       // for integer constants
        struct {
            struct ASTNode *left;
            struct ASTNode *right;
        } binary;       // for binary operations
    } data;
} ASTNode;

// Function prototypes for creating AST nodes
ASTNode* createVarNode(char *name);
ASTNode* createIntNode(int value);
ASTNode* createBinaryNode(NodeType type, ASTNode *left, ASTNode *right);
void printAST(ASTNode *node);
%}

// Union for semantic values
%union {
    int ival;
    char *sval;
    ASTNode *astNode; // Add ASTNode pointer for expression and other nodes
}

%token <sval> VAR
%token <ival> ICONST
%token EQUOP ADDOP SUBOP MULOP DIVOP LPAREN RPAREN

%type <astNode> program expression expression_suffix term term_suffix factor 

%start program

%%

// Rules of the grammar
program:
    VAR EQUOP expression {
        printf("Parsed program: %s = <expression>\n", $1);
        free($1); // Free the variable name after use
        $$ = $3; // Set the result of the program to the expression
    }
;

expression:
    term expression_suffix {
        printf("Parsed expression\n");
    }
;

expression_suffix:
    ADDOP term expression_suffix {
        printf("Parsed ADDOP\n");
    }
    | SUBOP term expression_suffix {
        printf("Parsed SUBOP\n");
    }
    | /* empty */ {
    }
;

term:
    factor term_suffix {
        $$ = $1; // Assign the factor to the term result
        printf("Parsed term\n");
    }
;

term_suffix:
    MULOP factor term_suffix {
        printf("Parsed MULOP\n");
    }
    | DIVOP factor term_suffix {
        printf("Parsed DIVOP\n");
    }
    | /* empty */ {
    }
;

factor:
    LPAREN expression RPAREN {
        $$ = $2; // The result is the expression inside the parentheses
        printf("Parsed factor: (expression)\n");
        printAST($$); // Print the AST for the expression inside the parentheses
    }
    | ICONST {
        $$ = createIntNode($1); // Create an integer node
        printf("Parsed ICONST: %d\n", $1);
    }
    | VAR {
        $$ = createVarNode($1); // Create a variable node
        printf("Parsed VAR: %s\n", $1);
    }
;

%% 

void yyerror(const char *s) {
    fprintf(stderr, "Error: %s\n", s);
}

// Function definitions for AST creation and printing
ASTNode* createVarNode(char *name) {
    ASTNode *node = malloc(sizeof(ASTNode));
    node->type = NODE_VAR;
    node->data.sval = strdup(name);
    return node;
}

ASTNode* createIntNode(int value) {
    ASTNode *node = malloc(sizeof(ASTNode));
    node->type = NODE_ICONST;
    node->data.ival = value;
    return node;
}

ASTNode* createBinaryNode(NodeType type, ASTNode *left, ASTNode *right) {
    ASTNode *node = malloc(sizeof(ASTNode));
    node->type = type;
    node->data.binary.left = left;
    node->data.binary.right = right;
    return node;
}

void printAST(ASTNode *node) {
    if (!node) return;
    switch (node->type) {
        case NODE_VAR:
            printf("VAR(%s)\n", node->data.sval);
            break;
        case NODE_ICONST:
            printf("ICONST(%d)\n", node->data.ival);
            break;
        case NODE_ADD:
            printf("ADD\n");
            printAST(node->data.binary.left);
            printAST(node->data.binary.right);
            break;
        case NODE_SUB:
            printf("SUB\n");
            printAST(node->data.binary.left);
            printAST(node->data.binary.right);
            break;
        case NODE_MUL:
            printf("MUL\n");
            printAST(node->data.binary.left);
            printAST(node->data.binary.right);
            break;
        case NODE_DIV:
            printf("DIV\n");
            printAST(node->data.binary.left);
            printAST(node->data.binary.right);
            break;
    }
}

int main() {
    yyin = fopen("input.txt", "r");
    if (!yyin) {
        perror("Failed to open input file");
        return 1;
    }

    yyout = fopen("lexicalOutput.txt", "w");
    if (!yyout) {
        perror("Failed to open output file");
        fclose(yyin);
        return 1;
    }

    yyparse();

    fclose(yyin);
    fclose(yyout);

    return 0; // Add return statement at the end
}

And this is my current lexer file, lexer.l:

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parser.tab.h"

extern int yylex();
%}

/* Lexical analyzer rules */
%% 
[ \t]+ ; /* Ignore whitespace */
[0-9]+  { 
    yylval.ival = atoi(yytext); 
    fprintf(yyout, "Read ICONST: %d\n", yylval.ival);
    return ICONST; 
}

[a-zA-Z_][a-zA-Z0-9_]* { 
    yylval.sval = strdup(yytext); 
    fprintf(yyout, "Read VAR: %s\n", yylval.sval);
    return VAR; 
}

"=" { fprintf(yyout, "Read =\n"); return EQUOP; }
"+" { fprintf(yyout, "Read +\n"); return ADDOP; }
"-" { fprintf(yyout, "Read -\n"); return SUBOP; }
"*" { fprintf(yyout, "Read *\n"); return MULOP; }
"/" { fprintf(yyout, "Read /\n"); return DIVOP; }
"(" { fprintf(yyout, "Read (\n"); return LPAREN; }
")" { fprintf(yyout, "Read )\n"); return RPAREN; }

.   { /* Ignore unrecognized characters */ }
%%

int yywrap(void) {
    return 1;
}

I believe the error is occurring because the lexer isn't recognizing the ASTNode type from my Bison file, but I'm not sure how to resolve it as I have the AST node defined before specifying it as a type.

Command line commands to compile the lexer and bison file:

$ bison -d parser.y  
$ flex lexer.l
$ gcc -o myparser parser.tab.c lex.yy.c
[produces error shown above]

Solution

  • The cause of the error message is that the generated parser.tab.h file does not contain the definition of ASTNode. You can open that file yourself after running bison to see that. (The main thing in the generated .tab.h file is enum yytokentype, which allows the lexer to know the token codes that bison has chosen.) Consequently, when the compiler tries to compile lex.yy.c, the definition of ASTNode is not available.

    To fix this, you need to put the defintion of ASTNode into its own file and #include it in both parser.y and lexer.l.

    For example, create astnode.h:

    #ifndef ASTNODE_H
    #define ASTNODE_H
    
    // Define NodeType enumeration for AST node types
    typedef enum { NODE_VAR, NODE_ICONST, NODE_ADD, NODE_SUB, NODE_MUL, NODE_DIV } NodeType;
    
    // Define the structure for AST nodes
    typedef struct ASTNode {
        NodeType type;
        union {
            char *sval; // for variables
            int ival;       // for integer constants
            struct {
                struct ASTNode *left;
                struct ASTNode *right;
            } binary;       // for binary operations
        } data;
    } ASTNode;
    
    #endif // ASTNODE_H
    

    Then remove those definitions from parser.y and add #include "astnode.h" to both files near the top. In lexer.l, it must come before #include "parser.tab.h" since the latter header needs the ASTNode definition.