Syntax error of Python PLY parser

I'm writing simplified MODULA-2 grammar using Python PLY.

But I'm getting syntax error:

$ python3 m2.py
Syntax error at 'MODULE'

and I cannot figure out what is the problem with rules.

Here is the grammar:

import ply.lex as lex
import ply.yacc as yacc

# =============================================================================
# Lexer rules
# =============================================================================

tokens = (
    # Keywords
    'RETURN', 'IF', 'THEN', 'VAR', 'MODULE', 'BEGIN', 'END',
    # Contants
    'NUMBER',
    # Operators
    'PLUS', 'MINUS', 'TIMES', 'DIV', 'MOD', 'ASSIGN_OP',
    # Separators
    'LPAR', 'RPAR', 'PERIOD', 'COLON', 'SEMICOLON',
    # Identifier
    'IDENT',
    )

# Tokens

t_NUMBER        = r'\d+'
t_PLUS          = r'\+'
t_MINUS         = r'-'
t_TIMES         = r'\*'
t_LPAR          = r'\('
t_RPAR          = r'\)'
t_PERIOD        = r'\.'
t_COLON         = r':'
t_SEMICOLON     = r';'
t_ASSIGN_OP     = r':='
t_IDENT         = r'[a-zA-Z][a-zA-Z0-9]*'

# Ignored characters
t_ignore = ' \t'

def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

# Build the lexer
lexer = lex.lex()


# =============================================================================
# Parser rules
# =============================================================================

precedence = (
    ('left', 'PLUS', 'MINUS'),
    ('left', 'TIMES', 'DIV'),
)

def p_add_operator(t):
    """ add_operator : PLUS
                     | MINUS
    """
    pass

def p_mul_operator(t):
    """ mul_operator : TIMES
                     | DIV
                     | MOD
    """
    pass

def p_simple_expression(t):
    """ expression : term
                   | expression add_operator term
    """
    pass

def p_term(t):
    """ term : factor
             | term mul_operator factor
    """
    pass

def p_factor(t):
    """ factor : NUMBER
               | IDENT
               | LPAR expression RPAR
    """
    pass

def p_statement(t):
    """ statement : IDENT
                  | IDENT ASSIGN_OP expression
                  | IF expression THEN statement_sequence END
                  | RETURN expression
    """
    pass

def p_statement_sequence(t):
    """ statement_sequence : statement
                           | statement_sequence SEMICOLON statement
    """
    pass

def p_block(t):
    """ block : declaration_list BEGIN statement_sequence END
    """
    pass

def p_declaration_list(t):
    """ declaration_list : declaration
                         | declaration_list declaration
    """
    pass

def p_declaration(t):
    """ declaration : VAR IDENT COLON IDENT SEMICOLON
    """
    pass

def p_program_module(t):
    """ program_module : MODULE IDENT SEMICOLON block IDENT PERIOD
    """
    pass

def p_error(t):
    print("Syntax error at '%s'" % t.value)

parser = yacc.yacc(start='program_module')

if __name__ == "__main__":
    s = "MODULE test; VAR x: INTEGER; BEGIN x := 10 END test."
    parser.parse(s)

The interesting thing is that the same grammar rules written for lex/yacc are working fine. Can somebody help me with this?

Solution

AFAIK, ply.lex has not enough magic to know that you want that the special MODULE word to be the token MODULE.

With your definition, the simple test:

lexer.input("MODULE test; VAR x: INTEGER; BEGIN x := 10 END test.")
for tok in lexer:
    print(tok)

outputs:

LexToken(IDENT,'MODULE',1,0)
LexToken(IDENT,'test',1,7)
LexToken(SEMICOLON,';',1,11)
LexToken(IDENT,'VAR',1,13)
LexToken(IDENT,'x',1,17)
LexToken(COLON,':',1,18)
LexToken(IDENT,'INTEGER',1,20)
LexToken(SEMICOLON,';',1,27)
LexToken(IDENT,'BEGIN',1,29)
LexToken(IDENT,'x',1,35)
LexToken(ASSIGN_OP,':=',1,37)
LexToken(NUMBER,'10',1,40)
LexToken(IDENT,'END',1,43)
LexToken(IDENT,'test',1,47)
LexToken(PERIOD,'.',1,51)

The correct way to process keywords is to identify them inside the IDENT token:

=============================================================================
# Lexer rules
# =============================================================================
# Keywords
keywords = ( 'RETURN', 'IF', 'THEN', 'VAR', 'MODULE', 'BEGIN', 'END' )
tokens = keywords + (
    # Contants
    'NUMBER',
    ...

and

def t_IDENT(t):
    r'[a-zA-Z][a-zA-Z0-9]*'
    if t.value in keywords:  # is this a keyword
        t.type = t.value
    return t

The same lexer control now correctly gives:

LexToken(MODULE,'MODULE',1,0)
LexToken(IDENT,'test',1,7)
LexToken(SEMICOLON,';',1,11)
LexToken(VAR,'VAR',1,13)
LexToken(IDENT,'x',1,17)
LexToken(COLON,':',1,18)
LexToken(IDENT,'INTEGER',1,20)
LexToken(SEMICOLON,';',1,27)
LexToken(BEGIN,'BEGIN',1,29)
LexToken(IDENT,'x',1,35)
LexToken(ASSIGN_OP,':=',1,37)
LexToken(NUMBER,'10',1,40)
LexToken(END,'END',1,43)
LexToken(IDENT,'test',1,47)
LexToken(PERIOD,'.',1,51)

and the parsing shows no error.