Search code examples
antlr4context-free-grammar

Why isn't this a[1+2] accepted? - ANTLR4 Grammar error


/* Casual has to be the filename */
grammar Grammar;

/* non-terminals start with lowercase */

prog: (comment|declaration|NEWLINE|SPACE|definition|statement)*EOF;

comment:
    '(*' (ANYCHAR|VARIABLE|SPACE|NEWLINE|number|operator)* '*)'
;

declaration:
    vname_type '('args_def?')' SPACE* ';'
;

vname_type:
    (VARIABLE) SPACE* ':' SPACE* type SPACE* refinement? /* Nomes das funções não podem começar com números */
;

type:
    (DOUBLE|INT|BOOLEAN|FLOAT|STRING)
;

refinement:
    'where' SPACE* (VARIABLE) SPACE* operator SPACE* (number|string_lit|TRUE|FALSE)
;

number:
    (NUMBER_INT|NUMBER_FLOAT)
;

string_lit:
    '"' (ANYCHAR|number|operator|SPACE)* '"'
;

statement:
    (return_statement|expression|value|if_statement|while_statement|arrays)
;

value:
    vname_type '=' SPACE* (number|string_lit|VARIABLE) SPACE* ';'
;

arrays:
    (VARIABLE '['position']' ';'
    |'get_array()['position']' ';'
    )
;

position:
    pos
    |pos SPACE* (MATH_OPERATOR) SPACE* pos
    |pos SPACE* (MATH_OPERATOR) SPACE* pos SPACE* (MATH_OPERATOR) SPACE* position
;
pos:
    (NUMBER_INT|VARIABLE)
;

expression:
    (function_call
    | (number|VARIABLE|ANYCHAR|SPACE)*';')
;

function_call:
    VARIABLE'('args_value? ')' ';'
;

return_statement:
    'return' SPACE* (VARIABLE|number) SPACE* ';'
;

definition:
    (function)
;

function:
    vname_type SPACE* '('SPACE* args_def? SPACE*')' SPACE*
    '{'
    (statement|SPACE)*
    '}'
;

if_statement:
    'if' SPACE* boolean_expression SPACE* '{'
    statement*
    '}' else_statement?
;

else_statement:
    'else' '{' statement* '}'
;

while_statement:
    'while' boolean_expression '{' statement* '}'
;

boolean_expression:
    (conditions_values
    | conditions_values operator conditions_values
    | (conditions_values operator conditions_values operator boolean_expression)
    )
;

conditions_values:
    VARIABLE
    |TRUE
    |FALSE
    |number
;

args_def:
    vname_type SPACE* (',' vname_type SPACE*)*
;

args_value:
    (number|string_lit|TRUE|FALSE) (',' (number|string_lit|TRUE|FALSE))*
;

operator:
    (MATH_OPERATOR | BOOLEAN_OPERATOR)
;

/* terminals start with uppercase, and can be defined using regular expressions. */

DOUBLE: 'Double';
INT: 'Int';
BOOLEAN: 'Boolean';
FLOAT: 'Float';
STRING: 'String';
TRUE: 'true';
FALSE: 'false';
NUMBER_INT: [0-9_]+; /* Underscore can be in any position */
NUMBER_FLOAT: ('.'[0-9]+|[0-9]+.[0-9]+);
MATH_OPERATOR: '+' | '-' | '*' | '/' | '%';
BOOLEAN_OPERATOR: '&&' | '||' | '==' | '!=' | '>=' | '<=' | '<' | '>' ;
NEWLINE : [\r\n]+ -> skip;
SPACE: (' '|'\t') -> skip;
VARIABLE: [a-zA-Z_][a-zA-Z0-9_]*;
ANYCHAR: (.)+?;

So this is my grammar, and I can't figure out why I get this error: mismatched input '1+2' expecting {NUMBER_INT, VARIABLE} When trying to parse a[1+2]

Im trying to make a new language and Im trying to access a position of an array I don't know what else to try to change on the grammar I've tried changing order if it was the case But if I write a[1+ 2] it works so I don't how the space makes such a big difference


Solution

  • The fact that your grammar does not accept a[1+2] but does accept a[1+ 2] is because of the rule:

    NUMBER_FLOAT: ('.'[0-9]+|[0-9]+.[0-9]+);
    //                             ^
    //                             ^
    //                             ^
    //                             This matches any char!
    

    causing 1+2 to become a NUMBER_FLOAT instead of 3 separate tokens. You need to wrap the . in quotes

    NUMBER_FLOAT
     : '.' [0-9]+
     | [0-9]+ '.' [0-9]+
     ;
    

    which could be written as:

    NUMBER_FLOAT
     : [0-9]* '.' [0-9]+
     ;
    

    I highly suggest you make a more general expression rule and use that rule in your arrays rule (and other rules):

    arrays
     : VARIABLE '[' expression ']' ';'
     | 'get_array' '(' ')' '[' expression ']' ';'
     ;
    
    expression
     : '(' expression ')'
     | expression MATH_OPERATOR expression
     | expression BOOLEAN_OPERATOR expression
     | function_call
     | NUMBER_INT
     | NUMBER_FLOAT
     | VARIABLE
     | ANYCHAR
     | TRUE
     | FALSE
     ;
    

    Also, remove the SPACEs from your parser rules since you are already skipping them in the lexer.

    A quick demo:

    grammar Grammar;
    
    prog
     : (COMMENT | declaration | definition | statement)* EOF
     ;
    
    declaration
     : vname_type '(' args_def? ')' ';'
     ;
    
    vname_type
     : VARIABLE ':' type refinement?
     ;
    
    type
     : DOUBLE
     | INT
     | BOOLEAN
     | FLOAT
     | STRING
     ;
    
    refinement
     : 'where' expression
     ;
    
    statement
     : return_statement
     | expression
     | value
     | if_statement
     | while_statement
     | arrays
     ;
    
    value
     : vname_type '=' expression ';'
     ;
    
    arrays
     : VARIABLE '[' expression ']' ';'
     | 'get_array' '(' ')' '[' expression ']' ';'
     ;
    
    expression
     : '(' expression ')'
     | expression MATH_OPERATOR expression
     | expression BOOLEAN_OPERATOR expression
     | function_call
     | NUMBER_INT
     | NUMBER_FLOAT
     | VARIABLE
     | ANYCHAR
     | TRUE
     | FALSE
     ;
    
    function_call
     : VARIABLE '(' args_value? ')' ';'
     ;
    
    return_statement
     : 'return' expression ';'
     ;
    
    definition
     : function
     ;
    
    function
     : vname_type '('args_def? ')' '{' statement* '}'
     ;
    
    if_statement
     : 'if' expression '{' statement* '}' else_statement?
     ;
    
    else_statement
     : 'else' '{' statement* '}'
     ;
    
    while_statement
     : 'while' expression '{' statement* '}'
     ;
    
    args_def
     : vname_type (',' vname_type)*
     ;
    
    args_value
     : expression (',' expression)*
     ;
    
    COMMENT: '(*' .*? '*)';
    DOUBLE: 'Double';
    INT: 'Int';
    BOOLEAN: 'Boolean';
    FLOAT: 'Float';
    STRING: 'String';
    TRUE: 'true';
    FALSE: 'false';
    NUMBER_INT: [0-9_]* [0-9] [0-9_]*;
    NUMBER_FLOAT: [0-9]* '.' [0-9]+;
    MATH_OPERATOR: '+' | '-' | '*' | '/' | '%';
    BOOLEAN_OPERATOR: '&&' | '||' | '==' | '!=' | '>=' | '<=' | '<' | '>' ;
    SPACES: [ \t\r\n] -> skip;
    VARIABLE: [a-zA-Z_] [a-zA-Z0-9_]*;
    STRING_LIT: '"' .*? '"';
    ANYCHAR: . ;
    

    If you now parse a[1+2];, you'll get:

    enter image description here